{"doc_id": 0, "native_id": "Mercury_7175875", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4525048732757568, "logits_per_token_corr": -1.4525048732757568, "logits_per_char_corr": -0.7262524366378784, "bits_per_byte_corr": 1.0477607887717462}, "model_output": [{"sum_logits": -1.6214826107025146, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.6214826107025146, "logits_per_char": -0.8107413053512573, "bits_per_byte": 1.169652460674944, "num_chars": 2}, {"sum_logits": -0.9533479809761047, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -0.9533479809761047, "logits_per_char": -0.47667399048805237, "bits_per_byte": 0.6876952021983601, "num_chars": 2}, {"sum_logits": -1.4525048732757568, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4525048732757568, "logits_per_char": -0.7262524366378784, "bits_per_byte": 1.0477607887717462, "num_chars": 2}, {"sum_logits": -1.7795426845550537, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.7795426845550537, "logits_per_char": -0.8897713422775269, "bits_per_byte": 1.2836687030298266, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1, "native_id": "Mercury_SC_409171", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1545711755752563, "logits_per_token_corr": -1.1545711755752563, "logits_per_char_corr": -0.5772855877876282, "bits_per_byte_corr": 0.8328470546784799}, "model_output": [{"sum_logits": -1.5218788385391235, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.5218788385391235, "logits_per_char": -0.7609394192695618, "bits_per_byte": 1.0978035265979131, "num_chars": 2}, {"sum_logits": -1.1545711755752563, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.1545711755752563, "logits_per_char": -0.5772855877876282, "bits_per_byte": 0.8328470546784799, "num_chars": 2}, {"sum_logits": -1.2845512628555298, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.2845512628555298, "logits_per_char": -0.6422756314277649, "bits_per_byte": 0.9266078683453298, "num_chars": 2}, {"sum_logits": -1.7416778802871704, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.7416778802871704, "logits_per_char": -0.8708389401435852, "bits_per_byte": 1.2563550203590539, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 2, "native_id": "Mercury_SC_408547", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4692208766937256, "logits_per_token_corr": -1.4692208766937256, "logits_per_char_corr": -0.7346104383468628, "bits_per_byte_corr": 1.059818836389048}, "model_output": [{"sum_logits": -1.2206284999847412, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.2206284999847412, "logits_per_char": -0.6103142499923706, "bits_per_byte": 0.8804973418484927, "num_chars": 2}, {"sum_logits": -1.1305902004241943, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.1305902004241943, "logits_per_char": -0.5652951002120972, "bits_per_byte": 0.8155484377154081, "num_chars": 2}, {"sum_logits": -1.4692208766937256, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4692208766937256, "logits_per_char": -0.7346104383468628, "bits_per_byte": 1.059818836389048, "num_chars": 2}, {"sum_logits": -1.953235387802124, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.953235387802124, "logits_per_char": -0.976617693901062, "bits_per_byte": 1.40896150383649, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 3, "native_id": "Mercury_407327", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5430738925933838, "logits_per_token_corr": -1.5430738925933838, "logits_per_char_corr": -0.7715369462966919, "bits_per_byte_corr": 1.1130925262856515}, "model_output": [{"sum_logits": -1.5482161045074463, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.5482161045074463, "logits_per_char": -0.7741080522537231, "bits_per_byte": 1.1168018480994633, "num_chars": 2}, {"sum_logits": -1.3447134494781494, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.3447134494781494, "logits_per_char": -0.6723567247390747, "bits_per_byte": 0.9700057124901058, "num_chars": 2}, {"sum_logits": -1.2374002933502197, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": true, "logits_per_token": -1.2374002933502197, "logits_per_char": -0.6187001466751099, "bits_per_byte": 0.8925956334060966, "num_chars": 2}, {"sum_logits": -1.5430738925933838, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.5430738925933838, "logits_per_char": -0.7715369462966919, "bits_per_byte": 1.1130925262856515, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 4, "native_id": "MCAS_2006_9_44", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.035372734069824, "logits_per_token_corr": -2.035372734069824, "logits_per_char_corr": -1.017686367034912, "bits_per_byte_corr": 1.468211074902628}, "model_output": [{"sum_logits": -1.2080183029174805, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": false, "logits_per_token": -1.2080183029174805, "logits_per_char": -0.6040091514587402, "bits_per_byte": 0.8714010074617013, "num_chars": 2}, {"sum_logits": -1.1626653671264648, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": true, "logits_per_token": -1.1626653671264648, "logits_per_char": -0.5813326835632324, "bits_per_byte": 0.838685779683951, "num_chars": 2}, {"sum_logits": -1.4392976760864258, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": false, "logits_per_token": -1.4392976760864258, "logits_per_char": -0.7196488380432129, "bits_per_byte": 1.038233809827194, "num_chars": 2}, {"sum_logits": -2.035372734069824, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": false, "logits_per_token": -2.035372734069824, "logits_per_char": -1.017686367034912, "bits_per_byte": 1.468211074902628, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 5, "native_id": "Mercury_7270393", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0677350759506226, "logits_per_token_corr": -1.0677350759506226, "logits_per_char_corr": -0.5338675379753113, "bits_per_byte_corr": 0.7702080495291352}, "model_output": [{"sum_logits": -1.5710119009017944, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.5710119009017944, "logits_per_char": -0.7855059504508972, "bits_per_byte": 1.1332455393050953, "num_chars": 2}, {"sum_logits": -1.0677350759506226, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": true, "logits_per_token": -1.0677350759506226, "logits_per_char": -0.5338675379753113, "bits_per_byte": 0.7702080495291352, "num_chars": 2}, {"sum_logits": -1.301139235496521, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.301139235496521, "logits_per_char": -0.6505696177482605, "bits_per_byte": 0.9385735612791183, "num_chars": 2}, {"sum_logits": -1.8307698965072632, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.8307698965072632, "logits_per_char": -0.9153849482536316, "bits_per_byte": 1.3206213253508634, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 6, "native_id": "MCAS_2014_5_7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2066917419433594, "logits_per_token_corr": -1.2066917419433594, "logits_per_char_corr": -0.6033458709716797, "bits_per_byte_corr": 0.8704440959923}, "model_output": [{"sum_logits": -1.5011711120605469, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -1.5011711120605469, "logits_per_char": -0.7505855560302734, "bits_per_byte": 1.0828660594485386, "num_chars": 2}, {"sum_logits": -1.1412029266357422, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": true, "logits_per_token": -1.1412029266357422, "logits_per_char": -0.5706014633178711, "bits_per_byte": 0.8232039014532698, "num_chars": 2}, {"sum_logits": -1.2066917419433594, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -1.2066917419433594, "logits_per_char": -0.6033458709716797, "bits_per_byte": 0.8704440959923, "num_chars": 2}, {"sum_logits": -1.9416065216064453, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -1.9416065216064453, "logits_per_char": -0.9708032608032227, "bits_per_byte": 1.4005730500406506, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 7, "native_id": "Mercury_7086660", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5107228755950928, "logits_per_token_corr": -1.5107228755950928, "logits_per_char_corr": -0.7553614377975464, "bits_per_byte_corr": 1.0897562003900603}, "model_output": [{"sum_logits": -1.272794485092163, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.272794485092163, "logits_per_char": -0.6363972425460815, "bits_per_byte": 0.9181271458573023, "num_chars": 2}, {"sum_logits": -1.011749029159546, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.011749029159546, "logits_per_char": -0.505874514579773, "bits_per_byte": 0.7298226534968744, "num_chars": 2}, {"sum_logits": -1.5107228755950928, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.5107228755950928, "logits_per_char": -0.7553614377975464, "bits_per_byte": 1.0897562003900603, "num_chars": 2}, {"sum_logits": -2.0799825191497803, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -2.0799825191497803, "logits_per_char": -1.0399912595748901, "bits_per_byte": 1.5003902327576386, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 8, "native_id": "Mercury_7168805", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0180821418762207, "logits_per_token_corr": -1.0180821418762207, "logits_per_char_corr": -0.5090410709381104, "bits_per_byte_corr": 0.7343910286517464}, "model_output": [{"sum_logits": -1.5714097023010254, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.5714097023010254, "logits_per_char": -0.7857048511505127, "bits_per_byte": 1.13353249235806, "num_chars": 2}, {"sum_logits": -1.0180821418762207, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": true, "logits_per_token": -1.0180821418762207, "logits_per_char": -0.5090410709381104, "bits_per_byte": 0.7343910286517464, "num_chars": 2}, {"sum_logits": -1.371056079864502, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.371056079864502, "logits_per_char": -0.685528039932251, "bits_per_byte": 0.9890079036013003, "num_chars": 2}, {"sum_logits": -1.7870163917541504, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.7870163917541504, "logits_per_char": -0.8935081958770752, "bits_per_byte": 1.289059843186427, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 9, "native_id": "MCAS_2003_8_11", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0763790607452393, "logits_per_token_corr": -1.0763790607452393, "logits_per_char_corr": -0.5381895303726196, "bits_per_byte_corr": 0.7764433665274963}, "model_output": [{"sum_logits": -1.0763790607452393, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.0763790607452393, "logits_per_char": -0.5381895303726196, "bits_per_byte": 0.7764433665274963, "num_chars": 2}, {"sum_logits": -1.238135576248169, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.238135576248169, "logits_per_char": -0.6190677881240845, "bits_per_byte": 0.8931260279013579, "num_chars": 2}, {"sum_logits": -1.5945179462432861, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.5945179462432861, "logits_per_char": -0.7972589731216431, "bits_per_byte": 1.1502015668276482, "num_chars": 2}, {"sum_logits": -2.033665895462036, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -2.033665895462036, "logits_per_char": -1.016832947731018, "bits_per_byte": 1.4669798511051002, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 10, "native_id": "Mercury_7250058", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2968069314956665, "logits_per_token_corr": -1.2968069314956665, "logits_per_char_corr": -0.6484034657478333, "bits_per_byte_corr": 0.935448464530288}, "model_output": [{"sum_logits": -1.3575903177261353, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3575903177261353, "logits_per_char": -0.6787951588630676, "bits_per_byte": 0.9792944094718873, "num_chars": 2}, {"sum_logits": -1.2968069314956665, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.2968069314956665, "logits_per_char": -0.6484034657478333, "bits_per_byte": 0.935448464530288, "num_chars": 2}, {"sum_logits": -1.4753881692886353, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4753881692886353, "logits_per_char": -0.7376940846443176, "bits_per_byte": 1.064267597610245, "num_chars": 2}, {"sum_logits": -1.5543371438980103, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.5543371438980103, "logits_per_char": -0.7771685719490051, "bits_per_byte": 1.1212172446863928, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 11, "native_id": "Mercury_7012740", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.379731297492981, "logits_per_token_corr": -1.379731297492981, "logits_per_char_corr": -0.6898656487464905, "bits_per_byte_corr": 0.9952657503269244}, "model_output": [{"sum_logits": -1.379731297492981, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.379731297492981, "logits_per_char": -0.6898656487464905, "bits_per_byte": 0.9952657503269244, "num_chars": 2}, {"sum_logits": -1.0445948839187622, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.0445948839187622, "logits_per_char": -0.5222974419593811, "bits_per_byte": 0.7535159293843317, "num_chars": 2}, {"sum_logits": -1.348939061164856, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.348939061164856, "logits_per_char": -0.674469530582428, "bits_per_byte": 0.9730538470026749, "num_chars": 2}, {"sum_logits": -2.0527563095092773, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -2.0527563095092773, "logits_per_char": -1.0263781547546387, "bits_per_byte": 1.4807506739423462, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 12, "native_id": "Mercury_LBS10610", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2613918781280518, "logits_per_token_corr": -1.2613918781280518, "logits_per_char_corr": -0.6306959390640259, "bits_per_byte_corr": 0.9099019035971317}, "model_output": [{"sum_logits": -1.5240375995635986, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.5240375995635986, "logits_per_char": -0.7620187997817993, "bits_per_byte": 1.0993607435101516, "num_chars": 2}, {"sum_logits": -1.0726521015167236, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.0726521015167236, "logits_per_char": -0.5363260507583618, "bits_per_byte": 0.773754933729207, "num_chars": 2}, {"sum_logits": -1.2613918781280518, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.2613918781280518, "logits_per_char": -0.6306959390640259, "bits_per_byte": 0.9099019035971317, "num_chars": 2}, {"sum_logits": -2.0592567920684814, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -2.0592567920684814, "logits_per_char": -1.0296283960342407, "bits_per_byte": 1.485439780918124, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 13, "native_id": "Mercury_SC_407400", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3464634418487549, "logits_per_token_corr": -1.3464634418487549, "logits_per_char_corr": -0.6732317209243774, "bits_per_byte_corr": 0.9712680651474397}, "model_output": [{"sum_logits": -1.3264248371124268, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3264248371124268, "logits_per_char": -0.6632124185562134, "bits_per_byte": 0.956813267307712, "num_chars": 2}, {"sum_logits": -1.1505939960479736, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.1505939960479736, "logits_per_char": -0.5752969980239868, "bits_per_byte": 0.8299781260881098, "num_chars": 2}, {"sum_logits": -1.3464634418487549, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3464634418487549, "logits_per_char": -0.6732317209243774, "bits_per_byte": 0.9712680651474397, "num_chars": 2}, {"sum_logits": -1.9414188861846924, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.9414188861846924, "logits_per_char": -0.9707094430923462, "bits_per_byte": 1.4004376996944214, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 14, "native_id": "Mercury_7212993", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.8510223627090454, "logits_per_token_corr": -0.8510223627090454, "logits_per_char_corr": -0.4255111813545227, "bits_per_byte_corr": 0.6138828711834153}, "model_output": [{"sum_logits": -2.029712200164795, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -2.029712200164795, "logits_per_char": -1.0148561000823975, "bits_per_byte": 1.4641278628058403, "num_chars": 2}, {"sum_logits": -1.5158611536026, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5158611536026, "logits_per_char": -0.7579305768013, "bits_per_byte": 1.0934626844901327, "num_chars": 2}, {"sum_logits": -0.8510223627090454, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -0.8510223627090454, "logits_per_char": -0.4255111813545227, "bits_per_byte": 0.6138828711834153, "num_chars": 2}, {"sum_logits": -1.5905624628067017, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5905624628067017, "logits_per_char": -0.7952812314033508, "bits_per_byte": 1.1473482886585067, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 15, "native_id": "Mercury_SC_413240", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6258890628814697, "logits_per_token_corr": -1.6258890628814697, "logits_per_char_corr": -0.8129445314407349, "bits_per_byte_corr": 1.1728310440281928}, "model_output": [{"sum_logits": -1.6258890628814697, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.6258890628814697, "logits_per_char": -0.8129445314407349, "bits_per_byte": 1.1728310440281928, "num_chars": 2}, {"sum_logits": -1.3004801273345947, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3004801273345947, "logits_per_char": -0.6502400636672974, "bits_per_byte": 0.9380981152408077, "num_chars": 2}, {"sum_logits": -1.0953924655914307, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.0953924655914307, "logits_per_char": -0.5476962327957153, "bits_per_byte": 0.7901586389685135, "num_chars": 2}, {"sum_logits": -1.7809526920318604, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.7809526920318604, "logits_per_char": -0.8904763460159302, "bits_per_byte": 1.28468580842703, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 16, "native_id": "Mercury_7186358", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4228262901306152, "logits_per_token_corr": -1.4228262901306152, "logits_per_char_corr": -0.7114131450653076, "bits_per_byte_corr": 1.0263522164096774}, "model_output": [{"sum_logits": -1.508643627166748, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.508643627166748, "logits_per_char": -0.754321813583374, "bits_per_byte": 1.0882563396918847, "num_chars": 2}, {"sum_logits": -1.1349921226501465, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": true, "logits_per_token": -1.1349921226501465, "logits_per_char": -0.5674960613250732, "bits_per_byte": 0.8187237533982904, "num_chars": 2}, {"sum_logits": -1.4228262901306152, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.4228262901306152, "logits_per_char": -0.7114131450653076, "bits_per_byte": 1.0263522164096774, "num_chars": 2}, {"sum_logits": -1.6064562797546387, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.6064562797546387, "logits_per_char": -0.8032281398773193, "bits_per_byte": 1.1588132541043077, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 17, "native_id": "Mercury_7166425", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2760069370269775, "logits_per_token_corr": -1.2760069370269775, "logits_per_char_corr": -0.6380034685134888, "bits_per_byte_corr": 0.9204444400950295}, "model_output": [{"sum_logits": -1.4369146823883057, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4369146823883057, "logits_per_char": -0.7184573411941528, "bits_per_byte": 1.036514843231819, "num_chars": 2}, {"sum_logits": -1.2760069370269775, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.2760069370269775, "logits_per_char": -0.6380034685134888, "bits_per_byte": 0.9204444400950295, "num_chars": 2}, {"sum_logits": -1.2507860660552979, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.2507860660552979, "logits_per_char": -0.6253930330276489, "bits_per_byte": 0.902251427356145, "num_chars": 2}, {"sum_logits": -1.6909983158111572, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.6909983158111572, "logits_per_char": -0.8454991579055786, "bits_per_byte": 1.2197974421870492, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 18, "native_id": "MDSA_2007_8_3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4068968296051025, "logits_per_token_corr": -1.4068968296051025, "logits_per_char_corr": -0.7034484148025513, "bits_per_byte_corr": 1.0148615395575724}, "model_output": [{"sum_logits": -1.4068968296051025, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.4068968296051025, "logits_per_char": -0.7034484148025513, "bits_per_byte": 1.0148615395575724, "num_chars": 2}, {"sum_logits": -1.430389642715454, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.430389642715454, "logits_per_char": -0.715194821357727, "bits_per_byte": 1.0318080220430021, "num_chars": 2}, {"sum_logits": -1.3272478580474854, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": true, "logits_per_token": -1.3272478580474854, "logits_per_char": -0.6636239290237427, "bits_per_byte": 0.9574069514184909, "num_chars": 2}, {"sum_logits": -1.4837572574615479, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.4837572574615479, "logits_per_char": -0.7418786287307739, "bits_per_byte": 1.070304618612161, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 19, "native_id": "Mercury_7094290", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4113062620162964, "logits_per_token_corr": -1.4113062620162964, "logits_per_char_corr": -0.7056531310081482, "bits_per_byte_corr": 1.0180422726939569}, "model_output": [{"sum_logits": -1.350589632987976, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.350589632987976, "logits_per_char": -0.675294816493988, "bits_per_byte": 0.9742444828945991, "num_chars": 2}, {"sum_logits": -1.2189825773239136, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.2189825773239136, "logits_per_char": -0.6094912886619568, "bits_per_byte": 0.8793100596182605, "num_chars": 2}, {"sum_logits": -1.4113062620162964, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4113062620162964, "logits_per_char": -0.7056531310081482, "bits_per_byte": 1.0180422726939569, "num_chars": 2}, {"sum_logits": -1.682906985282898, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.682906985282898, "logits_per_char": -0.841453492641449, "bits_per_byte": 1.2139607809733886, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 20, "native_id": "Mercury_7186568", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2704261541366577, "logits_per_token_corr": -1.2704261541366577, "logits_per_char_corr": -0.6352130770683289, "bits_per_byte_corr": 0.9164187561949554}, "model_output": [{"sum_logits": -1.361090064048767, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.361090064048767, "logits_per_char": -0.6805450320243835, "bits_per_byte": 0.9818189428039042, "num_chars": 2}, {"sum_logits": -1.2704261541366577, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.2704261541366577, "logits_per_char": -0.6352130770683289, "bits_per_byte": 0.9164187561949554, "num_chars": 2}, {"sum_logits": -1.3374980688095093, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3374980688095093, "logits_per_char": -0.6687490344047546, "bits_per_byte": 0.9648009155357155, "num_chars": 2}, {"sum_logits": -1.6604312658309937, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.6604312658309937, "logits_per_char": -0.8302156329154968, "bits_per_byte": 1.1977479764765397, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 21, "native_id": "Mercury_402216", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2427785396575928, "logits_per_token_corr": -1.2427785396575928, "logits_per_char_corr": -0.6213892698287964, "bits_per_byte_corr": 0.8964752180442626}, "model_output": [{"sum_logits": -1.6140477657318115, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -1.6140477657318115, "logits_per_char": -0.8070238828659058, "bits_per_byte": 1.1642893536904344, "num_chars": 2}, {"sum_logits": -1.2427785396575928, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": true, "logits_per_token": -1.2427785396575928, "logits_per_char": -0.6213892698287964, "bits_per_byte": 0.8964752180442626, "num_chars": 2}, {"sum_logits": -1.2528302669525146, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -1.2528302669525146, "logits_per_char": -0.6264151334762573, "bits_per_byte": 0.9037260066046438, "num_chars": 2}, {"sum_logits": -1.563842535018921, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -1.563842535018921, "logits_per_char": -0.7819212675094604, "bits_per_byte": 1.1280739350023217, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 22, "native_id": "Mercury_404894", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.193649172782898, "logits_per_token_corr": -1.193649172782898, "logits_per_char_corr": -0.596824586391449, "bits_per_byte_corr": 0.8610358710681687}, "model_output": [{"sum_logits": -1.193649172782898, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.193649172782898, "logits_per_char": -0.596824586391449, "bits_per_byte": 0.8610358710681687, "num_chars": 2}, {"sum_logits": -1.1280401945114136, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.1280401945114136, "logits_per_char": -0.5640200972557068, "bits_per_byte": 0.8137089972731036, "num_chars": 2}, {"sum_logits": -1.4335898160934448, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4335898160934448, "logits_per_char": -0.7167949080467224, "bits_per_byte": 1.0341164591742098, "num_chars": 2}, {"sum_logits": -2.070587158203125, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -2.070587158203125, "logits_per_char": -1.0352935791015625, "bits_per_byte": 1.493612912435083, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 23, "native_id": "MCAS_2002_8_11", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.347468376159668, "logits_per_token_corr": -1.347468376159668, "logits_per_char_corr": -0.673734188079834, "bits_per_byte_corr": 0.971992972020827}, "model_output": [{"sum_logits": -1.6244745254516602, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.6244745254516602, "logits_per_char": -0.8122372627258301, "bits_per_byte": 1.171810670960623, "num_chars": 2}, {"sum_logits": -1.185023307800293, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": true, "logits_per_token": -1.185023307800293, "logits_per_char": -0.5925116539001465, "bits_per_byte": 0.8548136247512733, "num_chars": 2}, {"sum_logits": -1.347468376159668, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.347468376159668, "logits_per_char": -0.673734188079834, "bits_per_byte": 0.971992972020827, "num_chars": 2}, {"sum_logits": -1.8136014938354492, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.8136014938354492, "logits_per_char": -0.9068007469177246, "bits_per_byte": 1.3082369406535492, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 24, "native_id": "Mercury_SC_405086", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4072223901748657, "logits_per_token_corr": -1.4072223901748657, "logits_per_char_corr": -0.7036111950874329, "bits_per_byte_corr": 1.0150963818673255}, "model_output": [{"sum_logits": -1.3071647882461548, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3071647882461548, "logits_per_char": -0.6535823941230774, "bits_per_byte": 0.9429200788143771, "num_chars": 2}, {"sum_logits": -1.4072223901748657, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4072223901748657, "logits_per_char": -0.7036111950874329, "bits_per_byte": 1.0150963818673255, "num_chars": 2}, {"sum_logits": -1.2543178796768188, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.2543178796768188, "logits_per_char": -0.6271589398384094, "bits_per_byte": 0.904799092354703, "num_chars": 2}, {"sum_logits": -1.822837471961975, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.822837471961975, "logits_per_char": -0.9114187359809875, "bits_per_byte": 1.314899290574003, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 25, "native_id": "Mercury_SC_408324", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7253772020339966, "logits_per_token_corr": -1.7253772020339966, "logits_per_char_corr": -0.8626886010169983, "bits_per_byte_corr": 1.244596566519555}, "model_output": [{"sum_logits": -1.5363398790359497, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.5363398790359497, "logits_per_char": -0.7681699395179749, "bits_per_byte": 1.1082349623033534, "num_chars": 2}, {"sum_logits": -1.0680228471755981, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.0680228471755981, "logits_per_char": -0.5340114235877991, "bits_per_byte": 0.7704156325887268, "num_chars": 2}, {"sum_logits": -1.385407567024231, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.385407567024231, "logits_per_char": -0.6927037835121155, "bits_per_byte": 0.999360313278669, "num_chars": 2}, {"sum_logits": -1.7253772020339966, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.7253772020339966, "logits_per_char": -0.8626886010169983, "bits_per_byte": 1.244596566519555, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 26, "native_id": "Mercury_7218820", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.915658712387085, "logits_per_token_corr": -0.915658712387085, "logits_per_char_corr": -0.4578293561935425, "bits_per_byte_corr": 0.6605081417542851}, "model_output": [{"sum_logits": -1.4030110836029053, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.4030110836029053, "logits_per_char": -0.7015055418014526, "bits_per_byte": 1.0120585663138082, "num_chars": 2}, {"sum_logits": -0.915658712387085, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -0.915658712387085, "logits_per_char": -0.4578293561935425, "bits_per_byte": 0.6605081417542851, "num_chars": 2}, {"sum_logits": -1.367704153060913, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.367704153060913, "logits_per_char": -0.6838520765304565, "bits_per_byte": 0.9865899995128183, "num_chars": 2}, {"sum_logits": -2.4144227504730225, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -2.4144227504730225, "logits_per_char": -1.2072113752365112, "bits_per_byte": 1.7416378643597115, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 27, "native_id": "Mercury_412202", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0612049102783203, "logits_per_token_corr": -1.0612049102783203, "logits_per_char_corr": -0.5306024551391602, "bits_per_byte_corr": 0.765497530713325}, "model_output": [{"sum_logits": -1.6019668579101562, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.6019668579101562, "logits_per_char": -0.8009834289550781, "bits_per_byte": 1.1555748207885588, "num_chars": 2}, {"sum_logits": -1.0612049102783203, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": true, "logits_per_token": -1.0612049102783203, "logits_per_char": -0.5306024551391602, "bits_per_byte": 0.765497530713325, "num_chars": 2}, {"sum_logits": -1.3013362884521484, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.3013362884521484, "logits_per_char": -0.6506681442260742, "bits_per_byte": 0.9387157049400565, "num_chars": 2}, {"sum_logits": -1.8336009979248047, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.8336009979248047, "logits_per_char": -0.9168004989624023, "bits_per_byte": 1.3226635333385353, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 28, "native_id": "Mercury_SC_409139", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4316601753234863, "logits_per_token_corr": -1.4316601753234863, "logits_per_char_corr": -0.7158300876617432, "bits_per_byte_corr": 1.0327245175894508}, "model_output": [{"sum_logits": -1.2190327644348145, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.2190327644348145, "logits_per_char": -0.6095163822174072, "bits_per_byte": 0.8793462619662671, "num_chars": 2}, {"sum_logits": -1.1190400123596191, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.1190400123596191, "logits_per_char": -0.5595200061798096, "bits_per_byte": 0.8072167381943534, "num_chars": 2}, {"sum_logits": -1.4316601753234863, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4316601753234863, "logits_per_char": -0.7158300876617432, "bits_per_byte": 1.0327245175894508, "num_chars": 2}, {"sum_logits": -2.0462422370910645, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -2.0462422370910645, "logits_per_char": -1.0231211185455322, "bits_per_byte": 1.476051763955469, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 29, "native_id": "Mercury_400687", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2028871774673462, "logits_per_token_corr": -1.2028871774673462, "logits_per_char_corr": -0.6014435887336731, "bits_per_byte_corr": 0.8676996828411548}, "model_output": [{"sum_logits": -1.2858911752700806, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.2858911752700806, "logits_per_char": -0.6429455876350403, "bits_per_byte": 0.9275744108431795, "num_chars": 2}, {"sum_logits": -1.2028871774673462, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.2028871774673462, "logits_per_char": -0.6014435887336731, "bits_per_byte": 0.8676996828411548, "num_chars": 2}, {"sum_logits": -1.335091233253479, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.335091233253479, "logits_per_char": -0.6675456166267395, "bits_per_byte": 0.9630647506752541, "num_chars": 2}, {"sum_logits": -1.917976975440979, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.917976975440979, "logits_per_char": -0.9589884877204895, "bits_per_byte": 1.3835279355049508, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 30, "native_id": "Mercury_7171605", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9643499851226807, "logits_per_token_corr": -1.9643499851226807, "logits_per_char_corr": -0.9821749925613403, "bits_per_byte_corr": 1.4169789910544182}, "model_output": [{"sum_logits": -1.3812906742095947, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.3812906742095947, "logits_per_char": -0.6906453371047974, "bits_per_byte": 0.9963906028548933, "num_chars": 2}, {"sum_logits": -1.1276209354400635, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.1276209354400635, "logits_per_char": -0.5638104677200317, "bits_per_byte": 0.8134065657815611, "num_chars": 2}, {"sum_logits": -1.2957384586334229, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.2957384586334229, "logits_per_char": -0.6478692293167114, "bits_per_byte": 0.9346777242804457, "num_chars": 2}, {"sum_logits": -1.9643499851226807, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.9643499851226807, "logits_per_char": -0.9821749925613403, "bits_per_byte": 1.4169789910544182, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 31, "native_id": "Mercury_7210245", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4250328540802002, "logits_per_token_corr": -1.4250328540802002, "logits_per_char_corr": -0.7125164270401001, "bits_per_byte_corr": 1.0279439158434138}, "model_output": [{"sum_logits": -1.2587826251983643, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.2587826251983643, "logits_per_char": -0.6293913125991821, "bits_per_byte": 0.9080197254660878, "num_chars": 2}, {"sum_logits": -1.1643974781036377, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.1643974781036377, "logits_per_char": -0.5821987390518188, "bits_per_byte": 0.8399352336424701, "num_chars": 2}, {"sum_logits": -1.4250328540802002, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4250328540802002, "logits_per_char": -0.7125164270401001, "bits_per_byte": 1.0279439158434138, "num_chars": 2}, {"sum_logits": -1.9110558032989502, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.9110558032989502, "logits_per_char": -0.9555279016494751, "bits_per_byte": 1.3785353651417254, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 32, "native_id": "AKDE&ED_2008_4_25", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2257524728775024, "logits_per_token_corr": -1.2257524728775024, "logits_per_char_corr": -0.6128762364387512, "bits_per_byte_corr": 0.8841935069895135}, "model_output": [{"sum_logits": -1.2257524728775024, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.2257524728775024, "logits_per_char": -0.6128762364387512, "bits_per_byte": 0.8841935069895135, "num_chars": 2}, {"sum_logits": -1.1476656198501587, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.1476656198501587, "logits_per_char": -0.5738328099250793, "bits_per_byte": 0.8278657491788859, "num_chars": 2}, {"sum_logits": -1.3895078897476196, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3895078897476196, "logits_per_char": -0.6947539448738098, "bits_per_byte": 1.0023180709082098, "num_chars": 2}, {"sum_logits": -2.0709500312805176, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -2.0709500312805176, "logits_per_char": -1.0354750156402588, "bits_per_byte": 1.4938746700296965, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 33, "native_id": "AKDE&ED_2008_4_19", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5723652839660645, "logits_per_token_corr": -1.5723652839660645, "logits_per_char_corr": -0.7861826419830322, "bits_per_byte_corr": 1.1342217988227188}, "model_output": [{"sum_logits": -1.1711945533752441, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.1711945533752441, "logits_per_char": -0.5855972766876221, "bits_per_byte": 0.8448382870359215, "num_chars": 2}, {"sum_logits": -1.0183606147766113, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.0183606147766113, "logits_per_char": -0.5091803073883057, "bits_per_byte": 0.7345919043879544, "num_chars": 2}, {"sum_logits": -1.5723652839660645, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.5723652839660645, "logits_per_char": -0.7861826419830322, "bits_per_byte": 1.1342217988227188, "num_chars": 2}, {"sum_logits": -2.1780648231506348, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -2.1780648231506348, "logits_per_char": -1.0890324115753174, "bits_per_byte": 1.5711416595481877, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 34, "native_id": "Mercury_SC_400402", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4186346530914307, "logits_per_token_corr": -1.4186346530914307, "logits_per_char_corr": -0.7093173265457153, "bits_per_byte_corr": 1.0233285894248563}, "model_output": [{"sum_logits": -1.4186346530914307, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4186346530914307, "logits_per_char": -0.7093173265457153, "bits_per_byte": 1.0233285894248563, "num_chars": 2}, {"sum_logits": -1.2655298709869385, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.2655298709869385, "logits_per_char": -0.6327649354934692, "bits_per_byte": 0.9128868344855088, "num_chars": 2}, {"sum_logits": -1.3191378116607666, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3191378116607666, "logits_per_char": -0.6595689058303833, "bits_per_byte": 0.9515567895667373, "num_chars": 2}, {"sum_logits": -1.682741403579712, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.682741403579712, "logits_per_char": -0.841370701789856, "bits_per_byte": 1.2138413390223641, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 35, "native_id": "Mercury_7234308", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5654263496398926, "logits_per_token_corr": -1.5654263496398926, "logits_per_char_corr": -0.7827131748199463, "bits_per_byte_corr": 1.1292164157520042}, "model_output": [{"sum_logits": -1.5654263496398926, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.5654263496398926, "logits_per_char": -0.7827131748199463, "bits_per_byte": 1.1292164157520042, "num_chars": 2}, {"sum_logits": -0.9067749381065369, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": true, "logits_per_token": -0.9067749381065369, "logits_per_char": -0.45338746905326843, "bits_per_byte": 0.6540998532048187, "num_chars": 2}, {"sum_logits": -1.5266900062561035, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.5266900062561035, "logits_per_char": -0.7633450031280518, "bits_per_byte": 1.1012740505010017, "num_chars": 2}, {"sum_logits": -1.8777375221252441, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.8777375221252441, "logits_per_char": -0.9388687610626221, "bits_per_byte": 1.354501305631583, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 36, "native_id": "ACTAAP_2014_5_8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0422307252883911, "logits_per_token_corr": -1.0422307252883911, "logits_per_char_corr": -0.5211153626441956, "bits_per_byte_corr": 0.7518105494183749}, "model_output": [{"sum_logits": -1.4858895540237427, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.4858895540237427, "logits_per_char": -0.7429447770118713, "bits_per_byte": 1.0718427454501536, "num_chars": 2}, {"sum_logits": -1.0422307252883911, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": true, "logits_per_token": -1.0422307252883911, "logits_per_char": -0.5211153626441956, "bits_per_byte": 0.7518105494183749, "num_chars": 2}, {"sum_logits": -1.2429238557815552, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.2429238557815552, "logits_per_char": -0.6214619278907776, "bits_per_byte": 0.8965800414699635, "num_chars": 2}, {"sum_logits": -2.0992202758789062, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -2.0992202758789062, "logits_per_char": -1.0496101379394531, "bits_per_byte": 1.5142673408731178, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 37, "native_id": "Mercury_400407", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0358083248138428, "logits_per_token_corr": -1.0358083248138428, "logits_per_char_corr": -0.5179041624069214, "bits_per_byte_corr": 0.7471777667607546}, "model_output": [{"sum_logits": -1.318622350692749, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.318622350692749, "logits_per_char": -0.6593111753463745, "bits_per_byte": 0.9511849630755717, "num_chars": 2}, {"sum_logits": -1.0358083248138428, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.0358083248138428, "logits_per_char": -0.5179041624069214, "bits_per_byte": 0.7471777667607546, "num_chars": 2}, {"sum_logits": -1.3859517574310303, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3859517574310303, "logits_per_char": -0.6929758787155151, "bits_per_byte": 0.9997528636792637, "num_chars": 2}, {"sum_logits": -2.1426427364349365, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -2.1426427364349365, "logits_per_char": -1.0713213682174683, "bits_per_byte": 1.545590025126831, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 38, "native_id": "Mercury_7116288", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4414291381835938, "logits_per_token_corr": -1.4414291381835938, "logits_per_char_corr": -0.7207145690917969, "bits_per_byte_corr": 1.0397713347259085}, "model_output": [{"sum_logits": -1.1848983764648438, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.1848983764648438, "logits_per_char": -0.5924491882324219, "bits_per_byte": 0.8547235058422211, "num_chars": 2}, {"sum_logits": -1.1823291778564453, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.1823291778564453, "logits_per_char": -0.5911645889282227, "bits_per_byte": 0.8528702207965222, "num_chars": 2}, {"sum_logits": -1.4414291381835938, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4414291381835938, "logits_per_char": -0.7207145690917969, "bits_per_byte": 1.0397713347259085, "num_chars": 2}, {"sum_logits": -1.9451980590820312, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.9451980590820312, "logits_per_char": -0.9725990295410156, "bits_per_byte": 1.4031637966932498, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 39, "native_id": "MCAS_2004_9_15-v1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1196904182434082, "logits_per_token_corr": -1.1196904182434082, "logits_per_char_corr": -0.5598452091217041, "bits_per_byte_corr": 0.8076859068659075}, "model_output": [{"sum_logits": -1.3864426612854004, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.3864426612854004, "logits_per_char": -0.6932213306427002, "bits_per_byte": 1.0001069759573904, "num_chars": 2}, {"sum_logits": -1.1196904182434082, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": true, "logits_per_token": -1.1196904182434082, "logits_per_char": -0.5598452091217041, "bits_per_byte": 0.8076859068659075, "num_chars": 2}, {"sum_logits": -1.4136204719543457, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.4136204719543457, "logits_per_char": -0.7068102359771729, "bits_per_byte": 1.019711622294558, "num_chars": 2}, {"sum_logits": -1.7890782356262207, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.7890782356262207, "logits_per_char": -0.8945391178131104, "bits_per_byte": 1.2905471491510896, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 40, "native_id": "NYSEDREGENTS_2015_4_26", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2822089195251465, "logits_per_token_corr": -1.2822089195251465, "logits_per_char_corr": -0.6411044597625732, "bits_per_byte_corr": 0.9249182247919269}, "model_output": [{"sum_logits": -1.4533476829528809, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4533476829528809, "logits_per_char": -0.7266738414764404, "bits_per_byte": 1.0483687474425465, "num_chars": 2}, {"sum_logits": -1.1460871696472168, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.1460871696472168, "logits_per_char": -0.5730435848236084, "bits_per_byte": 0.8267271380388478, "num_chars": 2}, {"sum_logits": -1.2822089195251465, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.2822089195251465, "logits_per_char": -0.6411044597625732, "bits_per_byte": 0.9249182247919269, "num_chars": 2}, {"sum_logits": -1.8441405296325684, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.8441405296325684, "logits_per_char": -0.9220702648162842, "bits_per_byte": 1.330266198402582, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 41, "native_id": "Mercury_SC_401620", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3555171489715576, "logits_per_token_corr": -1.3555171489715576, "logits_per_char_corr": -0.6777585744857788, "bits_per_byte_corr": 0.9777989343313087}, "model_output": [{"sum_logits": -1.3555171489715576, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3555171489715576, "logits_per_char": -0.6777585744857788, "bits_per_byte": 0.9777989343313087, "num_chars": 2}, {"sum_logits": -1.118828535079956, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.118828535079956, "logits_per_char": -0.559414267539978, "bits_per_byte": 0.8070641895830379, "num_chars": 2}, {"sum_logits": -1.3257911205291748, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3257911205291748, "logits_per_char": -0.6628955602645874, "bits_per_byte": 0.9563561374217183, "num_chars": 2}, {"sum_logits": -1.9991424083709717, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.9991424083709717, "logits_per_char": -0.9995712041854858, "bits_per_byte": 1.4420764192948459, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 42, "native_id": "Mercury_400877", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4737001657485962, "logits_per_token_corr": -1.4737001657485962, "logits_per_char_corr": -0.7368500828742981, "bits_per_byte_corr": 1.0630499604421353}, "model_output": [{"sum_logits": -1.4282385110855103, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.4282385110855103, "logits_per_char": -0.7141192555427551, "bits_per_byte": 1.0302563085755914, "num_chars": 2}, {"sum_logits": -1.5189214944839478, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5189214944839478, "logits_per_char": -0.7594607472419739, "bits_per_byte": 1.0956702537966094, "num_chars": 2}, {"sum_logits": -1.4737001657485962, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4737001657485962, "logits_per_char": -0.7368500828742981, "bits_per_byte": 1.0630499604421353, "num_chars": 2}, {"sum_logits": -1.757067322731018, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.757067322731018, "logits_per_char": -0.878533661365509, "bits_per_byte": 1.2674561565069544, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 43, "native_id": "Mercury_7174213", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9598143100738525, "logits_per_token_corr": -1.9598143100738525, "logits_per_char_corr": -0.9799071550369263, "bits_per_byte_corr": 1.4137071931044018}, "model_output": [{"sum_logits": -1.2482740879058838, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.2482740879058838, "logits_per_char": -0.6241370439529419, "bits_per_byte": 0.9004394181466532, "num_chars": 2}, {"sum_logits": -1.3960039615631104, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3960039615631104, "logits_per_char": -0.6980019807815552, "bits_per_byte": 1.0070039962049466, "num_chars": 2}, {"sum_logits": -1.170595407485962, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.170595407485962, "logits_per_char": -0.585297703742981, "bits_per_byte": 0.8444060946343029, "num_chars": 2}, {"sum_logits": -1.9598143100738525, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.9598143100738525, "logits_per_char": -0.9799071550369263, "bits_per_byte": 1.4137071931044018, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 44, "native_id": "NYSEDREGENTS_2008_8_34", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.227620005607605, "logits_per_token_corr": -1.227620005607605, "logits_per_char_corr": -0.6138100028038025, "bits_per_byte_corr": 0.8855406470937228}, "model_output": [{"sum_logits": -1.2471226453781128, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.2471226453781128, "logits_per_char": -0.6235613226890564, "bits_per_byte": 0.8996088279343106, "num_chars": 2}, {"sum_logits": -1.227620005607605, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -1.227620005607605, "logits_per_char": -0.6138100028038025, "bits_per_byte": 0.8855406470937228, "num_chars": 2}, {"sum_logits": -1.397154688835144, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.397154688835144, "logits_per_char": -0.698577344417572, "bits_per_byte": 1.0078340704693367, "num_chars": 2}, {"sum_logits": -1.8461273908615112, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.8461273908615112, "logits_per_char": -0.9230636954307556, "bits_per_byte": 1.3316994158235484, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 45, "native_id": "Mercury_7212398", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2686760425567627, "logits_per_token_corr": -1.2686760425567627, "logits_per_char_corr": -0.6343380212783813, "bits_per_byte_corr": 0.915156317546296}, "model_output": [{"sum_logits": -1.0264675617218018, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.0264675617218018, "logits_per_char": -0.5132337808609009, "bits_per_byte": 0.7404398304652466, "num_chars": 2}, {"sum_logits": -1.2686760425567627, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.2686760425567627, "logits_per_char": -0.6343380212783813, "bits_per_byte": 0.915156317546296, "num_chars": 2}, {"sum_logits": -1.6825649738311768, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.6825649738311768, "logits_per_char": -0.8412824869155884, "bits_per_byte": 1.2137140718607256, "num_chars": 2}, {"sum_logits": -1.851701021194458, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.851701021194458, "logits_per_char": -0.925850510597229, "bits_per_byte": 1.3357199402440967, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 46, "native_id": "Mercury_SC_401290", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4568570852279663, "logits_per_token_corr": -1.4568570852279663, "logits_per_char_corr": -0.7284285426139832, "bits_per_byte_corr": 1.0509002460719234}, "model_output": [{"sum_logits": -1.315177321434021, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.315177321434021, "logits_per_char": -0.6575886607170105, "bits_per_byte": 0.9486998997619277, "num_chars": 2}, {"sum_logits": -1.2594674825668335, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.2594674825668335, "logits_per_char": -0.6297337412834167, "bits_per_byte": 0.9085137456306915, "num_chars": 2}, {"sum_logits": -1.4568570852279663, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4568570852279663, "logits_per_char": -0.7284285426139832, "bits_per_byte": 1.0509002460719234, "num_chars": 2}, {"sum_logits": -1.6149591207504272, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.6149591207504272, "logits_per_char": -0.8074795603752136, "bits_per_byte": 1.1649467573733578, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 47, "native_id": "Mercury_SC_402120", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4642391204833984, "logits_per_token_corr": -1.4642391204833984, "logits_per_char_corr": -0.7321195602416992, "bits_per_byte_corr": 1.0562252588992669}, "model_output": [{"sum_logits": -1.3260040283203125, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3260040283203125, "logits_per_char": -0.6630020141601562, "bits_per_byte": 0.9565097179289389, "num_chars": 2}, {"sum_logits": -1.2281837463378906, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.2281837463378906, "logits_per_char": -0.6140918731689453, "bits_per_byte": 0.8859473000716882, "num_chars": 2}, {"sum_logits": -1.4642391204833984, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4642391204833984, "logits_per_char": -0.7321195602416992, "bits_per_byte": 1.0562252588992669, "num_chars": 2}, {"sum_logits": -1.6358089447021484, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.6358089447021484, "logits_per_char": -0.8179044723510742, "bits_per_byte": 1.1799867261826469, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 48, "native_id": "Mercury_184975", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3101767301559448, "logits_per_token_corr": -1.3101767301559448, "logits_per_char_corr": -0.6550883650779724, "bits_per_byte_corr": 0.9450927356427286}, "model_output": [{"sum_logits": -1.2786577939987183, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.2786577939987183, "logits_per_char": -0.6393288969993591, "bits_per_byte": 0.9223566291986489, "num_chars": 2}, {"sum_logits": -1.472305178642273, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.472305178642273, "logits_per_char": -0.7361525893211365, "bits_per_byte": 1.0620436899519363, "num_chars": 2}, {"sum_logits": -1.3101767301559448, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3101767301559448, "logits_per_char": -0.6550883650779724, "bits_per_byte": 0.9450927356427286, "num_chars": 2}, {"sum_logits": -1.6111239194869995, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.6111239194869995, "logits_per_char": -0.8055619597434998, "bits_per_byte": 1.1621802444515767, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 49, "native_id": "Mercury_SC_400578", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.179693579673767, "logits_per_token_corr": -1.179693579673767, "logits_per_char_corr": -0.5898467898368835, "bits_per_byte_corr": 0.8509690385825579}, "model_output": [{"sum_logits": -1.179693579673767, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.179693579673767, "logits_per_char": -0.5898467898368835, "bits_per_byte": 0.8509690385825579, "num_chars": 2}, {"sum_logits": -1.062963843345642, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.062963843345642, "logits_per_char": -0.531481921672821, "bits_per_byte": 0.7667663327200662, "num_chars": 2}, {"sum_logits": -1.4639438390731812, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4639438390731812, "logits_per_char": -0.7319719195365906, "bits_per_byte": 1.0560122583861733, "num_chars": 2}, {"sum_logits": -2.2279114723205566, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -2.2279114723205566, "logits_per_char": -1.1139557361602783, "bits_per_byte": 1.6070984163294026, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 50, "native_id": "MCAS_2001_8_4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6320828199386597, "logits_per_token_corr": -1.6320828199386597, "logits_per_char_corr": -0.8160414099693298, "bits_per_byte_corr": 1.1772988953236354}, "model_output": [{"sum_logits": -1.2441662549972534, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -1.2441662549972534, "logits_per_char": -0.6220831274986267, "bits_per_byte": 0.8974762430636103, "num_chars": 2}, {"sum_logits": -1.5186470746994019, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.5186470746994019, "logits_per_char": -0.7593235373497009, "bits_per_byte": 1.0954723017654662, "num_chars": 2}, {"sum_logits": -1.5276840925216675, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.5276840925216675, "logits_per_char": -0.7638420462608337, "bits_per_byte": 1.1019911321637748, "num_chars": 2}, {"sum_logits": -1.6320828199386597, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.6320828199386597, "logits_per_char": -0.8160414099693298, "bits_per_byte": 1.1772988953236354, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 51, "native_id": "MCAS_2003_5_33", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2886781692504883, "logits_per_token_corr": -1.2886781692504883, "logits_per_char_corr": -0.6443390846252441, "bits_per_byte_corr": 0.9295848020404417}, "model_output": [{"sum_logits": -1.4974374771118164, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4974374771118164, "logits_per_char": -0.7487187385559082, "bits_per_byte": 1.0801728111360251, "num_chars": 2}, {"sum_logits": -1.2806997299194336, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.2806997299194336, "logits_per_char": -0.6403498649597168, "bits_per_byte": 0.9238295746119646, "num_chars": 2}, {"sum_logits": -1.2886781692504883, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.2886781692504883, "logits_per_char": -0.6443390846252441, "bits_per_byte": 0.9295848020404417, "num_chars": 2}, {"sum_logits": -1.5717134475708008, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.5717134475708008, "logits_per_char": -0.7858567237854004, "bits_per_byte": 1.1337515982552595, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 52, "native_id": "Mercury_7068513", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.544609785079956, "logits_per_token_corr": -1.544609785079956, "logits_per_char_corr": -0.772304892539978, "bits_per_byte_corr": 1.1142004385225106}, "model_output": [{"sum_logits": -1.544609785079956, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.544609785079956, "logits_per_char": -0.772304892539978, "bits_per_byte": 1.1142004385225106, "num_chars": 2}, {"sum_logits": -1.168522596359253, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.168522596359253, "logits_per_char": -0.5842612981796265, "bits_per_byte": 0.8429108774677007, "num_chars": 2}, {"sum_logits": -1.4040248394012451, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4040248394012451, "logits_per_char": -0.7020124197006226, "bits_per_byte": 1.0127898365452774, "num_chars": 2}, {"sum_logits": -1.5337626934051514, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5337626934051514, "logits_per_char": -0.7668813467025757, "bits_per_byte": 1.1063759158388506, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 53, "native_id": "AKDE&ED_2008_4_26", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5643229484558105, "logits_per_token_corr": -1.5643229484558105, "logits_per_char_corr": -0.7821614742279053, "bits_per_byte_corr": 1.1284204800438105}, "model_output": [{"sum_logits": -1.0683951377868652, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.0683951377868652, "logits_per_char": -0.5341975688934326, "bits_per_byte": 0.7706841834980493, "num_chars": 2}, {"sum_logits": -1.122779369354248, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.122779369354248, "logits_per_char": -0.561389684677124, "bits_per_byte": 0.8099141140904876, "num_chars": 2}, {"sum_logits": -1.5643229484558105, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.5643229484558105, "logits_per_char": -0.7821614742279053, "bits_per_byte": 1.1284204800438105, "num_chars": 2}, {"sum_logits": -2.1952872276306152, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -2.1952872276306152, "logits_per_char": -1.0976436138153076, "bits_per_byte": 1.5835649983159226, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 54, "native_id": "Mercury_7235638", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4855129718780518, "logits_per_token_corr": -1.4855129718780518, "logits_per_char_corr": -0.7427564859390259, "bits_per_byte_corr": 1.0715710988531155}, "model_output": [{"sum_logits": -1.5331737995147705, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.5331737995147705, "logits_per_char": -0.7665868997573853, "bits_per_byte": 1.1059511186912192, "num_chars": 2}, {"sum_logits": -1.036566972732544, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.036566972732544, "logits_per_char": -0.518283486366272, "bits_per_byte": 0.7477250155558004, "num_chars": 2}, {"sum_logits": -1.4855129718780518, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4855129718780518, "logits_per_char": -0.7427564859390259, "bits_per_byte": 1.0715710988531155, "num_chars": 2}, {"sum_logits": -1.7404568195343018, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.7404568195343018, "logits_per_char": -0.8702284097671509, "bits_per_byte": 1.2554742112126593, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 55, "native_id": "MDSA_2009_5_20", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.257819652557373, "logits_per_token_corr": -1.257819652557373, "logits_per_char_corr": -0.6289098262786865, "bits_per_byte_corr": 0.9073250875392523}, "model_output": [{"sum_logits": -0.9536890983581543, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": true, "logits_per_token": -0.9536890983581543, "logits_per_char": -0.47684454917907715, "bits_per_byte": 0.6879412663760822, "num_chars": 2}, {"sum_logits": -1.257819652557373, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -1.257819652557373, "logits_per_char": -0.6289098262786865, "bits_per_byte": 0.9073250875392523, "num_chars": 2}, {"sum_logits": -1.578111171722412, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -1.578111171722412, "logits_per_char": -0.789055585861206, "bits_per_byte": 1.1383665807085155, "num_chars": 2}, {"sum_logits": -2.1895976066589355, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -2.1895976066589355, "logits_per_char": -1.0947988033294678, "bits_per_byte": 1.5794608043357294, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 56, "native_id": "Mercury_178325", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2066679000854492, "logits_per_token_corr": -1.2066679000854492, "logits_per_char_corr": -0.6033339500427246, "bits_per_byte_corr": 0.8704268977272137}, "model_output": [{"sum_logits": -1.1757478713989258, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.1757478713989258, "logits_per_char": -0.5878739356994629, "bits_per_byte": 0.8481228117021018, "num_chars": 2}, {"sum_logits": -1.3402929306030273, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3402929306030273, "logits_per_char": -0.6701464653015137, "bits_per_byte": 0.9668169821604562, "num_chars": 2}, {"sum_logits": -1.2066679000854492, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.2066679000854492, "logits_per_char": -0.6033339500427246, "bits_per_byte": 0.8704268977272137, "num_chars": 2}, {"sum_logits": -2.191342353820801, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -2.191342353820801, "logits_per_char": -1.0956711769104004, "bits_per_byte": 1.5807193733747444, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 57, "native_id": "Mercury_7212678", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3594086170196533, "logits_per_token_corr": -1.3594086170196533, "logits_per_char_corr": -0.6797043085098267, "bits_per_byte_corr": 0.9806060351586935}, "model_output": [{"sum_logits": -1.3594086170196533, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.3594086170196533, "logits_per_char": -0.6797043085098267, "bits_per_byte": 0.9806060351586935, "num_chars": 2}, {"sum_logits": -1.2011921405792236, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.2011921405792236, "logits_per_char": -0.6005960702896118, "bits_per_byte": 0.8664769721848448, "num_chars": 2}, {"sum_logits": -1.433436632156372, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.433436632156372, "logits_per_char": -0.716718316078186, "bits_per_byte": 1.0340059603210305, "num_chars": 2}, {"sum_logits": -1.691596269607544, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.691596269607544, "logits_per_char": -0.845798134803772, "bits_per_byte": 1.2202287746754135, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 58, "native_id": "TAKS_2009_8_32", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4281197786331177, "logits_per_token_corr": -1.4281197786331177, "logits_per_char_corr": -0.7140598893165588, "bits_per_byte_corr": 1.0301706612154617}, "model_output": [{"sum_logits": -1.4281197786331177, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4281197786331177, "logits_per_char": -0.7140598893165588, "bits_per_byte": 1.0301706612154617, "num_chars": 2}, {"sum_logits": -1.3507500886917114, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3507500886917114, "logits_per_char": -0.6753750443458557, "bits_per_byte": 0.9743602272186298, "num_chars": 2}, {"sum_logits": -1.2537709474563599, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.2537709474563599, "logits_per_char": -0.6268854737281799, "bits_per_byte": 0.9044045641536235, "num_chars": 2}, {"sum_logits": -1.6343880891799927, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.6343880891799927, "logits_per_char": -0.8171940445899963, "bits_per_byte": 1.178961795574829, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 59, "native_id": "Mercury_412681", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0765354633331299, "logits_per_token_corr": -1.0765354633331299, "logits_per_char_corr": -0.5382677316665649, "bits_per_byte_corr": 0.7765561871464624}, "model_output": [{"sum_logits": -1.744544267654419, "num_tokens": 1, "num_tokens_all": 446, "is_greedy": false, "logits_per_token": -1.744544267654419, "logits_per_char": -0.8722721338272095, "bits_per_byte": 1.2584226817790534, "num_chars": 2}, {"sum_logits": -1.3050243854522705, "num_tokens": 1, "num_tokens_all": 446, "is_greedy": false, "logits_per_token": -1.3050243854522705, "logits_per_char": -0.6525121927261353, "bits_per_byte": 0.9413761045662552, "num_chars": 2}, {"sum_logits": -1.0765354633331299, "num_tokens": 1, "num_tokens_all": 446, "is_greedy": true, "logits_per_token": -1.0765354633331299, "logits_per_char": -0.5382677316665649, "bits_per_byte": 0.7765561871464624, "num_chars": 2}, {"sum_logits": -1.6193840503692627, "num_tokens": 1, "num_tokens_all": 446, "is_greedy": false, "logits_per_token": -1.6193840503692627, "logits_per_char": -0.8096920251846313, "bits_per_byte": 1.1681386693820486, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 60, "native_id": "Mercury_400440", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.54293692111969, "logits_per_token_corr": -1.54293692111969, "logits_per_char_corr": -0.771468460559845, "bits_per_byte_corr": 1.1129937222527309}, "model_output": [{"sum_logits": -1.5583158731460571, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5583158731460571, "logits_per_char": -0.7791579365730286, "bits_per_byte": 1.1240872911639934, "num_chars": 2}, {"sum_logits": -1.3044325113296509, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3044325113296509, "logits_per_char": -0.6522162556648254, "bits_per_byte": 0.9409491576354879, "num_chars": 2}, {"sum_logits": -1.2840372323989868, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2840372323989868, "logits_per_char": -0.6420186161994934, "bits_per_byte": 0.9262370737500693, "num_chars": 2}, {"sum_logits": -1.54293692111969, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.54293692111969, "logits_per_char": -0.771468460559845, "bits_per_byte": 1.1129937222527309, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 61, "native_id": "Mercury_SC_416529", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2331722974777222, "logits_per_token_corr": -1.2331722974777222, "logits_per_char_corr": -0.6165861487388611, "bits_per_byte_corr": 0.8895457790670187}, "model_output": [{"sum_logits": -1.3958977460861206, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3958977460861206, "logits_per_char": -0.6979488730430603, "bits_per_byte": 1.0069273779339871, "num_chars": 2}, {"sum_logits": -1.2331722974777222, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2331722974777222, "logits_per_char": -0.6165861487388611, "bits_per_byte": 0.8895457790670187, "num_chars": 2}, {"sum_logits": -1.257424235343933, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.257424235343933, "logits_per_char": -0.6287121176719666, "bits_per_byte": 0.9070398543127962, "num_chars": 2}, {"sum_logits": -1.798014521598816, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.798014521598816, "logits_per_char": -0.899007260799408, "bits_per_byte": 1.2969933168794086, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 62, "native_id": "MCAS_2006_8_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8214870691299438, "logits_per_token_corr": -1.8214870691299438, "logits_per_char_corr": -0.9107435345649719, "bits_per_byte_corr": 1.3139251808395151}, "model_output": [{"sum_logits": -1.36260187625885, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.36260187625885, "logits_per_char": -0.681300938129425, "bits_per_byte": 0.9829094847930262, "num_chars": 2}, {"sum_logits": -1.1628128290176392, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.1628128290176392, "logits_per_char": -0.5814064145088196, "bits_per_byte": 0.8387921509535097, "num_chars": 2}, {"sum_logits": -1.3732887506484985, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3732887506484985, "logits_per_char": -0.6866443753242493, "bits_per_byte": 0.9906184351353062, "num_chars": 2}, {"sum_logits": -1.8214870691299438, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.8214870691299438, "logits_per_char": -0.9107435345649719, "bits_per_byte": 1.3139251808395151, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 63, "native_id": "TIMSS_2003_8_pg80", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2230725288391113, "logits_per_token_corr": -1.2230725288391113, "logits_per_char_corr": -0.6115362644195557, "bits_per_byte_corr": 0.8822603360024887}, "model_output": [{"sum_logits": -1.6555256843566895, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.6555256843566895, "logits_per_char": -0.8277628421783447, "bits_per_byte": 1.1942093474437097, "num_chars": 2}, {"sum_logits": -1.3237614631652832, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3237614631652832, "logits_per_char": -0.6618807315826416, "bits_per_byte": 0.9548920491149222, "num_chars": 2}, {"sum_logits": -1.2230725288391113, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.2230725288391113, "logits_per_char": -0.6115362644195557, "bits_per_byte": 0.8822603360024887, "num_chars": 2}, {"sum_logits": -1.4865155220031738, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4865155220031738, "logits_per_char": -0.7432577610015869, "bits_per_byte": 1.0722942858999942, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 64, "native_id": "Mercury_416645", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7970707416534424, "logits_per_token_corr": -1.7970707416534424, "logits_per_char_corr": -0.8985353708267212, "bits_per_byte_corr": 1.2963125235559678}, "model_output": [{"sum_logits": -1.2771008014678955, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.2771008014678955, "logits_per_char": -0.6385504007339478, "bits_per_byte": 0.9212334964971886, "num_chars": 2}, {"sum_logits": -1.0490200519561768, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": true, "logits_per_token": -1.0490200519561768, "logits_per_char": -0.5245100259780884, "bits_per_byte": 0.7567080133756732, "num_chars": 2}, {"sum_logits": -1.6367638111114502, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.6367638111114502, "logits_per_char": -0.8183819055557251, "bits_per_byte": 1.1806755166993528, "num_chars": 2}, {"sum_logits": -1.7970707416534424, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.7970707416534424, "logits_per_char": -0.8985353708267212, "bits_per_byte": 1.2963125235559678, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 65, "native_id": "Mercury_406777", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1605852842330933, "logits_per_token_corr": -1.1605852842330933, "logits_per_char_corr": -0.5802926421165466, "bits_per_byte_corr": 0.8371853170464973}, "model_output": [{"sum_logits": -1.1605852842330933, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.1605852842330933, "logits_per_char": -0.5802926421165466, "bits_per_byte": 0.8371853170464973, "num_chars": 2}, {"sum_logits": -1.1845968961715698, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.1845968961715698, "logits_per_char": -0.5922984480857849, "bits_per_byte": 0.854506033780205, "num_chars": 2}, {"sum_logits": -1.4157084226608276, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4157084226608276, "logits_per_char": -0.7078542113304138, "bits_per_byte": 1.02121776035949, "num_chars": 2}, {"sum_logits": -2.0488052368164062, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -2.0488052368164062, "logits_per_char": -1.0244026184082031, "bits_per_byte": 1.4779005774522456, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 66, "native_id": "Mercury_LBS11018", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6018387079238892, "logits_per_token_corr": -1.6018387079238892, "logits_per_char_corr": -0.8009193539619446, "bits_per_byte_corr": 1.15548238011372}, "model_output": [{"sum_logits": -1.6018387079238892, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.6018387079238892, "logits_per_char": -0.8009193539619446, "bits_per_byte": 1.15548238011372, "num_chars": 2}, {"sum_logits": -1.1720367670059204, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.1720367670059204, "logits_per_char": -0.5860183835029602, "bits_per_byte": 0.8454458157500948, "num_chars": 2}, {"sum_logits": -1.3291107416152954, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.3291107416152954, "logits_per_char": -0.6645553708076477, "bits_per_byte": 0.9587507378610084, "num_chars": 2}, {"sum_logits": -2.153510570526123, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -2.153510570526123, "logits_per_char": -1.0767552852630615, "bits_per_byte": 1.553429510301116, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 67, "native_id": "Mercury_7139878", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1820566654205322, "logits_per_token_corr": -1.1820566654205322, "logits_per_char_corr": -0.5910283327102661, "bits_per_byte_corr": 0.8526736446265858}, "model_output": [{"sum_logits": -1.4875280857086182, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4875280857086182, "logits_per_char": -0.7437640428543091, "bits_per_byte": 1.073024696218209, "num_chars": 2}, {"sum_logits": -1.2687208652496338, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.2687208652496338, "logits_per_char": -0.6343604326248169, "bits_per_byte": 0.9151886502846582, "num_chars": 2}, {"sum_logits": -1.1820566654205322, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.1820566654205322, "logits_per_char": -0.5910283327102661, "bits_per_byte": 0.8526736446265858, "num_chars": 2}, {"sum_logits": -1.7610437870025635, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.7610437870025635, "logits_per_char": -0.8805218935012817, "bits_per_byte": 1.2703245691493719, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 68, "native_id": "Mercury_417147", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1332345008850098, "logits_per_token_corr": -1.1332345008850098, "logits_per_char_corr": -0.5666172504425049, "bits_per_byte_corr": 0.8174558972961289}, "model_output": [{"sum_logits": -1.3373074531555176, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3373074531555176, "logits_per_char": -0.6686537265777588, "bits_per_byte": 0.9646634154063505, "num_chars": 2}, {"sum_logits": -1.1332345008850098, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.1332345008850098, "logits_per_char": -0.5666172504425049, "bits_per_byte": 0.8174558972961289, "num_chars": 2}, {"sum_logits": -1.4440903663635254, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4440903663635254, "logits_per_char": -0.7220451831817627, "bits_per_byte": 1.0416910050748407, "num_chars": 2}, {"sum_logits": -1.8815178871154785, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.8815178871154785, "logits_per_char": -0.9407589435577393, "bits_per_byte": 1.3572282625436658, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 69, "native_id": "Mercury_7016765", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2326617240905762, "logits_per_token_corr": -1.2326617240905762, "logits_per_char_corr": -0.6163308620452881, "bits_per_byte_corr": 0.8891774782201958}, "model_output": [{"sum_logits": -1.2326617240905762, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.2326617240905762, "logits_per_char": -0.6163308620452881, "bits_per_byte": 0.8891774782201958, "num_chars": 2}, {"sum_logits": -0.9667238593101501, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -0.9667238593101501, "logits_per_char": -0.4833619296550751, "bits_per_byte": 0.6973438588683978, "num_chars": 2}, {"sum_logits": -1.5421690940856934, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.5421690940856934, "logits_per_char": -0.7710845470428467, "bits_per_byte": 1.1124398521256267, "num_chars": 2}, {"sum_logits": -2.2638068199157715, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -2.2638068199157715, "logits_per_char": -1.1319034099578857, "bits_per_byte": 1.6329914363127223, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 70, "native_id": "Mercury_415303", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4708592891693115, "logits_per_token_corr": -1.4708592891693115, "logits_per_char_corr": -0.7354296445846558, "bits_per_byte_corr": 1.061000701165778}, "model_output": [{"sum_logits": -1.4708592891693115, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.4708592891693115, "logits_per_char": -0.7354296445846558, "bits_per_byte": 1.061000701165778, "num_chars": 2}, {"sum_logits": -1.6938340663909912, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.6938340663909912, "logits_per_char": -0.8469170331954956, "bits_per_byte": 1.221843003836413, "num_chars": 2}, {"sum_logits": -1.6416795253753662, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.6416795253753662, "logits_per_char": -0.8208397626876831, "bits_per_byte": 1.1842214549948449, "num_chars": 2}, {"sum_logits": -2.0467641353607178, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -2.0467641353607178, "logits_per_char": -1.0233820676803589, "bits_per_byte": 1.476428233978208, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 71, "native_id": "Mercury_7215845", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.470520257949829, "logits_per_token_corr": -1.470520257949829, "logits_per_char_corr": -0.7352601289749146, "bits_per_byte_corr": 1.060756141836251}, "model_output": [{"sum_logits": -1.1922667026519775, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.1922667026519775, "logits_per_char": -0.5961333513259888, "bits_per_byte": 0.8600386296671401, "num_chars": 2}, {"sum_logits": -1.22951340675354, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.22951340675354, "logits_per_char": -0.61475670337677, "bits_per_byte": 0.8869064473155508, "num_chars": 2}, {"sum_logits": -1.470520257949829, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.470520257949829, "logits_per_char": -0.7352601289749146, "bits_per_byte": 1.060756141836251, "num_chars": 2}, {"sum_logits": -1.8283751010894775, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.8283751010894775, "logits_per_char": -0.9141875505447388, "bits_per_byte": 1.3188938456142707, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 72, "native_id": "Mercury_7136885", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3642826080322266, "logits_per_token_corr": -1.3642826080322266, "logits_per_char_corr": -0.6821413040161133, "bits_per_byte_corr": 0.9841218764902844}, "model_output": [{"sum_logits": -1.4199180603027344, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4199180603027344, "logits_per_char": -0.7099590301513672, "bits_per_byte": 1.0242543720344512, "num_chars": 2}, {"sum_logits": -1.3642826080322266, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.3642826080322266, "logits_per_char": -0.6821413040161133, "bits_per_byte": 0.9841218764902844, "num_chars": 2}, {"sum_logits": -1.203561782836914, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.203561782836914, "logits_per_char": -0.601780891418457, "bits_per_byte": 0.8681863077517714, "num_chars": 2}, {"sum_logits": -1.7359981536865234, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.7359981536865234, "logits_per_char": -0.8679990768432617, "bits_per_byte": 1.2522579636588715, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 73, "native_id": "Mercury_SC_400059", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8405976295471191, "logits_per_token_corr": -1.8405976295471191, "logits_per_char_corr": -0.9202988147735596, "bits_per_byte_corr": 1.3277105362107589}, "model_output": [{"sum_logits": -1.1018834114074707, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.1018834114074707, "logits_per_char": -0.5509417057037354, "bits_per_byte": 0.7948408666382567, "num_chars": 2}, {"sum_logits": -1.2831387519836426, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.2831387519836426, "logits_per_char": -0.6415693759918213, "bits_per_byte": 0.9255889571302923, "num_chars": 2}, {"sum_logits": -1.5180726051330566, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.5180726051330566, "logits_per_char": -0.7590363025665283, "bits_per_byte": 1.0950579095682118, "num_chars": 2}, {"sum_logits": -1.8405976295471191, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.8405976295471191, "logits_per_char": -0.9202988147735596, "bits_per_byte": 1.3277105362107589, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 74, "native_id": "Mercury_7044328", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7540968656539917, "logits_per_token_corr": -1.7540968656539917, "logits_per_char_corr": -0.8770484328269958, "bits_per_byte_corr": 1.2653134246598532}, "model_output": [{"sum_logits": -1.4239462614059448, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4239462614059448, "logits_per_char": -0.7119731307029724, "bits_per_byte": 1.027160104912106, "num_chars": 2}, {"sum_logits": -1.2767390012741089, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.2767390012741089, "logits_per_char": -0.6383695006370544, "bits_per_byte": 0.9209725128245041, "num_chars": 2}, {"sum_logits": -1.2198771238327026, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2198771238327026, "logits_per_char": -0.6099385619163513, "bits_per_byte": 0.8799553385242982, "num_chars": 2}, {"sum_logits": -1.7540968656539917, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.7540968656539917, "logits_per_char": -0.8770484328269958, "bits_per_byte": 1.2653134246598532, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 75, "native_id": "MEA_2010_8_1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7038694620132446, "logits_per_token_corr": -1.7038694620132446, "logits_per_char_corr": -0.8519347310066223, "bits_per_byte_corr": 1.2290820115852101}, "model_output": [{"sum_logits": -1.7038694620132446, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.7038694620132446, "logits_per_char": -0.8519347310066223, "bits_per_byte": 1.2290820115852101, "num_chars": 2}, {"sum_logits": -1.2312933206558228, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.2312933206558228, "logits_per_char": -0.6156466603279114, "bits_per_byte": 0.888190383795568, "num_chars": 2}, {"sum_logits": -1.1089140176773071, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.1089140176773071, "logits_per_char": -0.5544570088386536, "bits_per_byte": 0.7999123770382284, "num_chars": 2}, {"sum_logits": -1.7029215097427368, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.7029215097427368, "logits_per_char": -0.8514607548713684, "bits_per_byte": 1.228398208565379, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 76, "native_id": "Mercury_414099", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0290355682373047, "logits_per_token_corr": -1.0290355682373047, "logits_per_char_corr": -0.5145177841186523, "bits_per_byte_corr": 0.7422922555976913}, "model_output": [{"sum_logits": -1.3899898529052734, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.3899898529052734, "logits_per_char": -0.6949949264526367, "bits_per_byte": 1.0026657338369291, "num_chars": 2}, {"sum_logits": -1.0290355682373047, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.0290355682373047, "logits_per_char": -0.5145177841186523, "bits_per_byte": 0.7422922555976913, "num_chars": 2}, {"sum_logits": -1.4472675323486328, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.4472675323486328, "logits_per_char": -0.7236337661743164, "bits_per_byte": 1.04398284588024, "num_chars": 2}, {"sum_logits": -1.9301795959472656, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.9301795959472656, "logits_per_char": -0.9650897979736328, "bits_per_byte": 1.392330265550092, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 77, "native_id": "Mercury_410807", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1162474155426025, "logits_per_token_corr": -1.1162474155426025, "logits_per_char_corr": -0.5581237077713013, "bits_per_byte_corr": 0.8052023054047959}, "model_output": [{"sum_logits": -1.4160196781158447, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4160196781158447, "logits_per_char": -0.7080098390579224, "bits_per_byte": 1.0214422837101915, "num_chars": 2}, {"sum_logits": -1.1162474155426025, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.1162474155426025, "logits_per_char": -0.5581237077713013, "bits_per_byte": 0.8052023054047959, "num_chars": 2}, {"sum_logits": -1.2579424381256104, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.2579424381256104, "logits_per_char": -0.6289712190628052, "bits_per_byte": 0.9074136586044468, "num_chars": 2}, {"sum_logits": -1.978649377822876, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.978649377822876, "logits_per_char": -0.989324688911438, "bits_per_byte": 1.4272938225225735, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 78, "native_id": "Mercury_403234", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1758180856704712, "logits_per_token_corr": -1.1758180856704712, "logits_per_char_corr": -0.5879090428352356, "bits_per_byte_corr": 0.8481734605927809}, "model_output": [{"sum_logits": -1.0520883798599243, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -1.0520883798599243, "logits_per_char": -0.5260441899299622, "bits_per_byte": 0.7589213441009536, "num_chars": 2}, {"sum_logits": -1.1758180856704712, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.1758180856704712, "logits_per_char": -0.5879090428352356, "bits_per_byte": 0.8481734605927809, "num_chars": 2}, {"sum_logits": -1.6474443674087524, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.6474443674087524, "logits_per_char": -0.8237221837043762, "bits_per_byte": 1.188379909501385, "num_chars": 2}, {"sum_logits": -2.047325611114502, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -2.047325611114502, "logits_per_char": -1.023662805557251, "bits_per_byte": 1.4768332531209902, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 79, "native_id": "Mercury_7011323", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3421026468276978, "logits_per_token_corr": -1.3421026468276978, "logits_per_char_corr": -0.6710513234138489, "bits_per_byte_corr": 0.9681224164718313}, "model_output": [{"sum_logits": -1.3421026468276978, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.3421026468276978, "logits_per_char": -0.6710513234138489, "bits_per_byte": 0.9681224164718313, "num_chars": 2}, {"sum_logits": -1.3758512735366821, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3758512735366821, "logits_per_char": -0.6879256367683411, "bits_per_byte": 0.992466904666781, "num_chars": 2}, {"sum_logits": -1.3772302865982056, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3772302865982056, "logits_per_char": -0.6886151432991028, "bits_per_byte": 0.9934616523193722, "num_chars": 2}, {"sum_logits": -1.5203901529312134, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5203901529312134, "logits_per_char": -0.7601950764656067, "bits_per_byte": 1.096729666925925, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 80, "native_id": "Mercury_7109463", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4015238285064697, "logits_per_token_corr": -1.4015238285064697, "logits_per_char_corr": -0.7007619142532349, "bits_per_byte_corr": 1.0109857385377252}, "model_output": [{"sum_logits": -1.57857346534729, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.57857346534729, "logits_per_char": -0.789286732673645, "bits_per_byte": 1.1387000550685387, "num_chars": 2}, {"sum_logits": -1.3714048862457275, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.3714048862457275, "logits_per_char": -0.6857024431228638, "bits_per_byte": 0.9892595142195127, "num_chars": 2}, {"sum_logits": -1.2798941135406494, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.2798941135406494, "logits_per_char": -0.6399470567703247, "bits_per_byte": 0.9232484452346987, "num_chars": 2}, {"sum_logits": -1.4015238285064697, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4015238285064697, "logits_per_char": -0.7007619142532349, "bits_per_byte": 1.0109857385377252, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 81, "native_id": "Mercury_SC_401277", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4547079801559448, "logits_per_token_corr": -1.4547079801559448, "logits_per_char_corr": -0.7273539900779724, "bits_per_byte_corr": 1.049349994457045}, "model_output": [{"sum_logits": -1.263431429862976, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.263431429862976, "logits_per_char": -0.631715714931488, "bits_per_byte": 0.9113731291839386, "num_chars": 2}, {"sum_logits": -1.1806334257125854, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.1806334257125854, "logits_per_char": -0.5903167128562927, "bits_per_byte": 0.8516469941922595, "num_chars": 2}, {"sum_logits": -1.4547079801559448, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4547079801559448, "logits_per_char": -0.7273539900779724, "bits_per_byte": 1.049349994457045, "num_chars": 2}, {"sum_logits": -1.8388441801071167, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.8388441801071167, "logits_per_char": -0.9194220900535583, "bits_per_byte": 1.3264456898049874, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 82, "native_id": "MCAS_2005_5_25", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4222456216812134, "logits_per_token_corr": -1.4222456216812134, "logits_per_char_corr": -0.7111228108406067, "bits_per_byte_corr": 1.0259333526635008}, "model_output": [{"sum_logits": -1.175972580909729, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.175972580909729, "logits_per_char": -0.5879862904548645, "bits_per_byte": 0.8482849053505401, "num_chars": 2}, {"sum_logits": -1.2273622751235962, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.2273622751235962, "logits_per_char": -0.6136811375617981, "bits_per_byte": 0.8853547338481399, "num_chars": 2}, {"sum_logits": -1.4222456216812134, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4222456216812134, "logits_per_char": -0.7111228108406067, "bits_per_byte": 1.0259333526635008, "num_chars": 2}, {"sum_logits": -2.0005135536193848, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -2.0005135536193848, "logits_per_char": -1.0002567768096924, "bits_per_byte": 1.4430654915199586, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 83, "native_id": "Mercury_SC_401272", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6759008169174194, "logits_per_token_corr": -1.6759008169174194, "logits_per_char_corr": -0.8379504084587097, "bits_per_byte_corr": 1.2089068987951304}, "model_output": [{"sum_logits": -1.4381002187728882, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4381002187728882, "logits_per_char": -0.7190501093864441, "bits_per_byte": 1.0373700269632349, "num_chars": 2}, {"sum_logits": -1.0981725454330444, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.0981725454330444, "logits_per_char": -0.5490862727165222, "bits_per_byte": 0.7921640426689006, "num_chars": 2}, {"sum_logits": -1.4878381490707397, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4878381490707397, "logits_per_char": -0.7439190745353699, "bits_per_byte": 1.0732483596556563, "num_chars": 2}, {"sum_logits": -1.6759008169174194, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.6759008169174194, "logits_per_char": -0.8379504084587097, "bits_per_byte": 1.2089068987951304, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 84, "native_id": "Mercury_7103600", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.064239501953125, "logits_per_token_corr": -2.064239501953125, "logits_per_char_corr": -1.0321197509765625, "bits_per_byte_corr": 1.4890340463385083}, "model_output": [{"sum_logits": -1.1297245025634766, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": true, "logits_per_token": -1.1297245025634766, "logits_per_char": -0.5648622512817383, "bits_per_byte": 0.8149239687101248, "num_chars": 2}, {"sum_logits": -1.4673347473144531, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -1.4673347473144531, "logits_per_char": -0.7336673736572266, "bits_per_byte": 1.0584582816380712, "num_chars": 2}, {"sum_logits": -1.2519817352294922, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -1.2519817352294922, "logits_per_char": -0.6259908676147461, "bits_per_byte": 0.9031139203502226, "num_chars": 2}, {"sum_logits": -2.064239501953125, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -2.064239501953125, "logits_per_char": -1.0321197509765625, "bits_per_byte": 1.4890340463385083, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 85, "native_id": "MDSA_2009_8_2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2141749858856201, "logits_per_token_corr": -1.2141749858856201, "logits_per_char_corr": -0.6070874929428101, "bits_per_byte_corr": 0.8758421154549348}, "model_output": [{"sum_logits": -1.2141749858856201, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.2141749858856201, "logits_per_char": -0.6070874929428101, "bits_per_byte": 0.8758421154549348, "num_chars": 2}, {"sum_logits": -1.3398005962371826, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3398005962371826, "logits_per_char": -0.6699002981185913, "bits_per_byte": 0.9664618379864242, "num_chars": 2}, {"sum_logits": -1.4557063579559326, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4557063579559326, "logits_per_char": -0.7278531789779663, "bits_per_byte": 1.0500701718075336, "num_chars": 2}, {"sum_logits": -1.7612788677215576, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.7612788677215576, "logits_per_char": -0.8806394338607788, "bits_per_byte": 1.2704941440431226, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 86, "native_id": "Mercury_7127943", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3731251955032349, "logits_per_token_corr": -1.3731251955032349, "logits_per_char_corr": -0.6865625977516174, "bits_per_byte_corr": 0.9905004550368143}, "model_output": [{"sum_logits": -1.3731251955032349, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -1.3731251955032349, "logits_per_char": -0.6865625977516174, "bits_per_byte": 0.9905004550368143, "num_chars": 2}, {"sum_logits": -1.0669156312942505, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": true, "logits_per_token": -1.0669156312942505, "logits_per_char": -0.5334578156471252, "bits_per_byte": 0.7696169451581194, "num_chars": 2}, {"sum_logits": -1.5599805116653442, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -1.5599805116653442, "logits_per_char": -0.7799902558326721, "bits_per_byte": 1.1252880740323183, "num_chars": 2}, {"sum_logits": -1.737005352973938, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -1.737005352973938, "logits_per_char": -0.868502676486969, "bits_per_byte": 1.2529845043674421, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 87, "native_id": "ACTAAP_2009_7_8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.239337205886841, "logits_per_token_corr": -2.239337205886841, "logits_per_char_corr": -1.1196686029434204, "bits_per_byte_corr": 1.6153403409067069}, "model_output": [{"sum_logits": -1.2536957263946533, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.2536957263946533, "logits_per_char": -0.6268478631973267, "bits_per_byte": 0.9043503036272762, "num_chars": 2}, {"sum_logits": -1.0964252948760986, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": true, "logits_per_token": -1.0964252948760986, "logits_per_char": -0.5482126474380493, "bits_per_byte": 0.7909036678120517, "num_chars": 2}, {"sum_logits": -1.3201076984405518, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.3201076984405518, "logits_per_char": -0.6600538492202759, "bits_per_byte": 0.9522564149904478, "num_chars": 2}, {"sum_logits": -2.239337205886841, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -2.239337205886841, "logits_per_char": -1.1196686029434204, "bits_per_byte": 1.6153403409067069, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 88, "native_id": "MCAS_2006_9_43", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1483144760131836, "logits_per_token_corr": -1.1483144760131836, "logits_per_char_corr": -0.5741572380065918, "bits_per_byte_corr": 0.8283337999632093}, "model_output": [{"sum_logits": -1.8397245407104492, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.8397245407104492, "logits_per_char": -0.9198622703552246, "bits_per_byte": 1.327080735743299, "num_chars": 2}, {"sum_logits": -1.4109735488891602, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4109735488891602, "logits_per_char": -0.7054867744445801, "bits_per_byte": 1.0178022709046777, "num_chars": 2}, {"sum_logits": -1.1483144760131836, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.1483144760131836, "logits_per_char": -0.5741572380065918, "bits_per_byte": 0.8283337999632093, "num_chars": 2}, {"sum_logits": -1.3468332290649414, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.3468332290649414, "logits_per_char": -0.6734166145324707, "bits_per_byte": 0.9715348102389281, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 89, "native_id": "Mercury_7252088", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3886398077011108, "logits_per_token_corr": -1.3886398077011108, "logits_per_char_corr": -0.6943199038505554, "bits_per_byte_corr": 1.0016918820764178}, "model_output": [{"sum_logits": -1.3436702489852905, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3436702489852905, "logits_per_char": -0.6718351244926453, "bits_per_byte": 0.969253202401255, "num_chars": 2}, {"sum_logits": -1.3292945623397827, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.3292945623397827, "logits_per_char": -0.6646472811698914, "bits_per_byte": 0.9588833364848237, "num_chars": 2}, {"sum_logits": -1.3886398077011108, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3886398077011108, "logits_per_char": -0.6943199038505554, "bits_per_byte": 1.0016918820764178, "num_chars": 2}, {"sum_logits": -1.566686987876892, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.566686987876892, "logits_per_char": -0.783343493938446, "bits_per_byte": 1.1301257740184418, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 90, "native_id": "Mercury_7084665", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3969671726226807, "logits_per_token_corr": -1.3969671726226807, "logits_per_char_corr": -0.6984835863113403, "bits_per_byte_corr": 1.007698806114433}, "model_output": [{"sum_logits": -1.3969671726226807, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3969671726226807, "logits_per_char": -0.6984835863113403, "bits_per_byte": 1.007698806114433, "num_chars": 2}, {"sum_logits": -0.9941675066947937, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -0.9941675066947937, "logits_per_char": -0.49708375334739685, "bits_per_byte": 0.7171402658612773, "num_chars": 2}, {"sum_logits": -1.48042893409729, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.48042893409729, "logits_per_char": -0.740214467048645, "bits_per_byte": 1.0679037408061145, "num_chars": 2}, {"sum_logits": -1.9676306247711182, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.9676306247711182, "logits_per_char": -0.9838153123855591, "bits_per_byte": 1.419345472330292, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 91, "native_id": "FCAT_2008_5_2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.0786726474761963, "logits_per_token_corr": -2.0786726474761963, "logits_per_char_corr": -1.0393363237380981, "bits_per_byte_corr": 1.4994453600737976}, "model_output": [{"sum_logits": -1.2859423160552979, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.2859423160552979, "logits_per_char": -0.6429711580276489, "bits_per_byte": 0.9276113011217896, "num_chars": 2}, {"sum_logits": -1.126516580581665, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.126516580581665, "logits_per_char": -0.5632582902908325, "bits_per_byte": 0.812609942142764, "num_chars": 2}, {"sum_logits": -1.3501322269439697, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.3501322269439697, "logits_per_char": -0.6750661134719849, "bits_per_byte": 0.9739145341789185, "num_chars": 2}, {"sum_logits": -2.0786726474761963, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -2.0786726474761963, "logits_per_char": -1.0393363237380981, "bits_per_byte": 1.4994453600737976, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 92, "native_id": "Mercury_SC_414041", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.1319689750671387, "logits_per_token_corr": -2.1319689750671387, "logits_per_char_corr": -1.0659844875335693, "bits_per_byte_corr": 1.5378905338303483}, "model_output": [{"sum_logits": -1.3276476860046387, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3276476860046387, "logits_per_char": -0.6638238430023193, "bits_per_byte": 0.957695366323988, "num_chars": 2}, {"sum_logits": -0.9284605979919434, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -0.9284605979919434, "logits_per_char": -0.4642302989959717, "bits_per_byte": 0.6697427501923703, "num_chars": 2}, {"sum_logits": -1.5555205345153809, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.5555205345153809, "logits_per_char": -0.7777602672576904, "bits_per_byte": 1.122070880573951, "num_chars": 2}, {"sum_logits": -2.1319689750671387, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -2.1319689750671387, "logits_per_char": -1.0659844875335693, "bits_per_byte": 1.5378905338303483, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 93, "native_id": "MCAS_2014_8_20", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3396284580230713, "logits_per_token_corr": -1.3396284580230713, "logits_per_char_corr": -0.6698142290115356, "bits_per_byte_corr": 0.9663376665125012}, "model_output": [{"sum_logits": -1.3396284580230713, "num_tokens": 1, "num_tokens_all": 427, "is_greedy": false, "logits_per_token": -1.3396284580230713, "logits_per_char": -0.6698142290115356, "bits_per_byte": 0.9663376665125012, "num_chars": 2}, {"sum_logits": -1.2664988040924072, "num_tokens": 1, "num_tokens_all": 427, "is_greedy": true, "logits_per_token": -1.2664988040924072, "logits_per_char": -0.6332494020462036, "bits_per_byte": 0.9135857719786157, "num_chars": 2}, {"sum_logits": -1.320160150527954, "num_tokens": 1, "num_tokens_all": 427, "is_greedy": false, "logits_per_token": -1.320160150527954, "logits_per_char": -0.660080075263977, "bits_per_byte": 0.9522942511736375, "num_chars": 2}, {"sum_logits": -1.7528746128082275, "num_tokens": 1, "num_tokens_all": 427, "is_greedy": false, "logits_per_token": -1.7528746128082275, "logits_per_char": -0.8764373064041138, "bits_per_byte": 1.2644317556002043, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 94, "native_id": "Mercury_SC_401116", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0585765838623047, "logits_per_token_corr": -1.0585765838623047, "logits_per_char_corr": -0.5292882919311523, "bits_per_byte_corr": 0.763601593970212}, "model_output": [{"sum_logits": -1.3547744750976562, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3547744750976562, "logits_per_char": -0.6773872375488281, "bits_per_byte": 0.9772632083738707, "num_chars": 2}, {"sum_logits": -1.0585765838623047, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.0585765838623047, "logits_per_char": -0.5292882919311523, "bits_per_byte": 0.763601593970212, "num_chars": 2}, {"sum_logits": -1.3575096130371094, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3575096130371094, "logits_per_char": -0.6787548065185547, "bits_per_byte": 0.9792361933445702, "num_chars": 2}, {"sum_logits": -2.055950164794922, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -2.055950164794922, "logits_per_char": -1.027975082397461, "bits_per_byte": 1.483054553533306, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 95, "native_id": "Mercury_7064680", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5344898700714111, "logits_per_token_corr": -1.5344898700714111, "logits_per_char_corr": -0.7672449350357056, "bits_per_byte_corr": 1.1069004629239825}, "model_output": [{"sum_logits": -1.1135494709014893, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.1135494709014893, "logits_per_char": -0.5567747354507446, "bits_per_byte": 0.8032561497276309, "num_chars": 2}, {"sum_logits": -1.2264149188995361, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.2264149188995361, "logits_per_char": -0.6132074594497681, "bits_per_byte": 0.8846713607849361, "num_chars": 2}, {"sum_logits": -1.5344898700714111, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5344898700714111, "logits_per_char": -0.7672449350357056, "bits_per_byte": 1.1069004629239825, "num_chars": 2}, {"sum_logits": -1.974402666091919, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.974402666091919, "logits_per_char": -0.9872013330459595, "bits_per_byte": 1.424230467545403, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 96, "native_id": "Mercury_7211680", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2316018342971802, "logits_per_token_corr": -1.2316018342971802, "logits_per_char_corr": -0.6158009171485901, "bits_per_byte_corr": 0.8884129293457846}, "model_output": [{"sum_logits": -1.3480130434036255, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.3480130434036255, "logits_per_char": -0.6740065217018127, "bits_per_byte": 0.9723858663867233, "num_chars": 2}, {"sum_logits": -1.2621155977249146, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.2621155977249146, "logits_per_char": -0.6310577988624573, "bits_per_byte": 0.9104239569338262, "num_chars": 2}, {"sum_logits": -1.2316018342971802, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -1.2316018342971802, "logits_per_char": -0.6158009171485901, "bits_per_byte": 0.8884129293457846, "num_chars": 2}, {"sum_logits": -1.8793832063674927, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.8793832063674927, "logits_per_char": -0.9396916031837463, "bits_per_byte": 1.3556884158791644, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 97, "native_id": "Mercury_180373", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9981837868690491, "logits_per_token_corr": -0.9981837868690491, "logits_per_char_corr": -0.49909189343452454, "bits_per_byte_corr": 0.7200373996063888}, "model_output": [{"sum_logits": -1.1896510124206543, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.1896510124206543, "logits_per_char": -0.5948255062103271, "bits_per_byte": 0.8581518080045228, "num_chars": 2}, {"sum_logits": -0.9981837868690491, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -0.9981837868690491, "logits_per_char": -0.49909189343452454, "bits_per_byte": 0.7200373996063888, "num_chars": 2}, {"sum_logits": -1.53731107711792, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.53731107711792, "logits_per_char": -0.76865553855896, "bits_per_byte": 1.1089355336316435, "num_chars": 2}, {"sum_logits": -2.28010892868042, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -2.28010892868042, "logits_per_char": -1.14005446434021, "bits_per_byte": 1.6447509220481262, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 98, "native_id": "Mercury_7216248", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2942988872528076, "logits_per_token_corr": -1.2942988872528076, "logits_per_char_corr": -0.6471494436264038, "bits_per_byte_corr": 0.9336392930345353}, "model_output": [{"sum_logits": -1.5568759441375732, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.5568759441375732, "logits_per_char": -0.7784379720687866, "bits_per_byte": 1.1230486019441066, "num_chars": 2}, {"sum_logits": -1.2942988872528076, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.2942988872528076, "logits_per_char": -0.6471494436264038, "bits_per_byte": 0.9336392930345353, "num_chars": 2}, {"sum_logits": -1.2430732250213623, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2430732250213623, "logits_per_char": -0.6215366125106812, "bits_per_byte": 0.8966877886007292, "num_chars": 2}, {"sum_logits": -1.5398986339569092, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.5398986339569092, "logits_per_char": -0.7699493169784546, "bits_per_byte": 1.110802061341459, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 99, "native_id": "Mercury_SC_417677", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.048597812652588, "logits_per_token_corr": -1.048597812652588, "logits_per_char_corr": -0.524298906326294, "bits_per_byte_corr": 0.7564034321009949}, "model_output": [{"sum_logits": -1.2607941627502441, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.2607941627502441, "logits_per_char": -0.6303970813751221, "bits_per_byte": 0.9094707430914183, "num_chars": 2}, {"sum_logits": -1.048597812652588, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.048597812652588, "logits_per_char": -0.524298906326294, "bits_per_byte": 0.7564034321009949, "num_chars": 2}, {"sum_logits": -1.4354443550109863, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4354443550109863, "logits_per_char": -0.7177221775054932, "bits_per_byte": 1.0354542262239472, "num_chars": 2}, {"sum_logits": -2.1337456703186035, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -2.1337456703186035, "logits_per_char": -1.0668728351593018, "bits_per_byte": 1.539172148544579, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 100, "native_id": "Mercury_7221655", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4268149137496948, "logits_per_token_corr": -1.4268149137496948, "logits_per_char_corr": -0.7134074568748474, "bits_per_byte_corr": 1.0292294001672888}, "model_output": [{"sum_logits": -1.2946871519088745, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.2946871519088745, "logits_per_char": -0.6473435759544373, "bits_per_byte": 0.9339193667814656, "num_chars": 2}, {"sum_logits": -1.2091349363327026, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.2091349363327026, "logits_per_char": -0.6045674681663513, "bits_per_byte": 0.872206488207018, "num_chars": 2}, {"sum_logits": -1.4268149137496948, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4268149137496948, "logits_per_char": -0.7134074568748474, "bits_per_byte": 1.0292294001672888, "num_chars": 2}, {"sum_logits": -1.7797354459762573, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.7797354459762573, "logits_per_char": -0.8898677229881287, "bits_per_byte": 1.2838077510030494, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 101, "native_id": "MCAS_2006_9_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.1415209770202637, "logits_per_token_corr": -2.1415209770202637, "logits_per_char_corr": -1.0707604885101318, "bits_per_byte_corr": 1.544780846754521}, "model_output": [{"sum_logits": -1.3103747367858887, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3103747367858887, "logits_per_char": -0.6551873683929443, "bits_per_byte": 0.9452355672342703, "num_chars": 2}, {"sum_logits": -1.0400919914245605, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.0400919914245605, "logits_per_char": -0.5200459957122803, "bits_per_byte": 0.7502677790488089, "num_chars": 2}, {"sum_logits": -1.397233486175537, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.397233486175537, "logits_per_char": -0.6986167430877686, "bits_per_byte": 1.0078909107354468, "num_chars": 2}, {"sum_logits": -2.1415209770202637, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -2.1415209770202637, "logits_per_char": -1.0707604885101318, "bits_per_byte": 1.544780846754521, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 102, "native_id": "MCAS_2004_9_2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3897465467453003, "logits_per_token_corr": -1.3897465467453003, "logits_per_char_corr": -0.6948732733726501, "bits_per_byte_corr": 1.0024902255417236}, "model_output": [{"sum_logits": -1.3897465467453003, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3897465467453003, "logits_per_char": -0.6948732733726501, "bits_per_byte": 1.0024902255417236, "num_chars": 2}, {"sum_logits": -1.2914341688156128, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.2914341688156128, "logits_per_char": -0.6457170844078064, "bits_per_byte": 0.9315728354930918, "num_chars": 2}, {"sum_logits": -1.3175820112228394, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3175820112228394, "logits_per_char": -0.6587910056114197, "bits_per_byte": 0.9504345167785313, "num_chars": 2}, {"sum_logits": -1.641459345817566, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.641459345817566, "logits_per_char": -0.820729672908783, "bits_per_byte": 1.1840626290167728, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 103, "native_id": "Mercury_180005", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3149691820144653, "logits_per_token_corr": -1.3149691820144653, "logits_per_char_corr": -0.6574845910072327, "bits_per_byte_corr": 0.9485497589077244}, "model_output": [{"sum_logits": -1.4410125017166138, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4410125017166138, "logits_per_char": -0.7205062508583069, "bits_per_byte": 1.0394707950435256, "num_chars": 2}, {"sum_logits": -1.3149691820144653, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3149691820144653, "logits_per_char": -0.6574845910072327, "bits_per_byte": 0.9485497589077244, "num_chars": 2}, {"sum_logits": -1.1348716020584106, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.1348716020584106, "logits_per_char": -0.5674358010292053, "bits_per_byte": 0.8186368161682792, "num_chars": 2}, {"sum_logits": -1.8948661088943481, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.8948661088943481, "logits_per_char": -0.9474330544471741, "bits_per_byte": 1.3668569692262034, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 104, "native_id": "Mercury_7071523", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.70139479637146, "logits_per_token_corr": -1.70139479637146, "logits_per_char_corr": -0.85069739818573, "bits_per_byte_corr": 1.2272969176605781}, "model_output": [{"sum_logits": -1.4559876918792725, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4559876918792725, "logits_per_char": -0.7279938459396362, "bits_per_byte": 1.0502731113355517, "num_chars": 2}, {"sum_logits": -0.9894444942474365, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -0.9894444942474365, "logits_per_char": -0.49472224712371826, "bits_per_byte": 0.7137333325433454, "num_chars": 2}, {"sum_logits": -1.70139479637146, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.70139479637146, "logits_per_char": -0.85069739818573, "bits_per_byte": 1.2272969176605781, "num_chars": 2}, {"sum_logits": -1.6264665126800537, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.6264665126800537, "logits_per_char": -0.8132332563400269, "bits_per_byte": 1.1732475860085827, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 105, "native_id": "Mercury_7263375", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0517884492874146, "logits_per_token_corr": -1.0517884492874146, "logits_per_char_corr": -0.5258942246437073, "bits_per_byte_corr": 0.7587049899261681}, "model_output": [{"sum_logits": -1.4496899843215942, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4496899843215942, "logits_per_char": -0.7248449921607971, "bits_per_byte": 1.045730275604333, "num_chars": 2}, {"sum_logits": -1.0517884492874146, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.0517884492874146, "logits_per_char": -0.5258942246437073, "bits_per_byte": 0.7587049899261681, "num_chars": 2}, {"sum_logits": -1.3158646821975708, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.3158646821975708, "logits_per_char": -0.6579323410987854, "bits_per_byte": 0.9491957257443655, "num_chars": 2}, {"sum_logits": -1.968807339668274, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.968807339668274, "logits_per_char": -0.984403669834137, "bits_per_byte": 1.4201942927036262, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 106, "native_id": "TIMSS_2011_8_pg102", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4188345670700073, "logits_per_token_corr": -1.4188345670700073, "logits_per_char_corr": -0.7094172835350037, "bits_per_byte_corr": 1.0234727968776047}, "model_output": [{"sum_logits": -1.5417708158493042, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.5417708158493042, "logits_per_char": -0.7708854079246521, "bits_per_byte": 1.11215255510736, "num_chars": 2}, {"sum_logits": -1.123468279838562, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.123468279838562, "logits_per_char": -0.561734139919281, "bits_per_byte": 0.810411057960156, "num_chars": 2}, {"sum_logits": -1.5875128507614136, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.5875128507614136, "logits_per_char": -0.7937564253807068, "bits_per_byte": 1.145148458571319, "num_chars": 2}, {"sum_logits": -1.4188345670700073, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4188345670700073, "logits_per_char": -0.7094172835350037, "bits_per_byte": 1.0234727968776047, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 107, "native_id": "Mercury_406550", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0359447002410889, "logits_per_token_corr": -1.0359447002410889, "logits_per_char_corr": -0.5179723501205444, "bits_per_byte_corr": 0.7472761408370482}, "model_output": [{"sum_logits": -1.460768461227417, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.460768461227417, "logits_per_char": -0.7303842306137085, "bits_per_byte": 1.0537217074506553, "num_chars": 2}, {"sum_logits": -1.0359447002410889, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.0359447002410889, "logits_per_char": -0.5179723501205444, "bits_per_byte": 0.7472761408370482, "num_chars": 2}, {"sum_logits": -1.2790992259979248, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.2790992259979248, "logits_per_char": -0.6395496129989624, "bits_per_byte": 0.9226750550767217, "num_chars": 2}, {"sum_logits": -2.0733840465545654, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -2.0733840465545654, "logits_per_char": -1.0366920232772827, "bits_per_byte": 1.4956304409123562, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 108, "native_id": "Mercury_SC_400057", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8894503116607666, "logits_per_token_corr": -1.8894503116607666, "logits_per_char_corr": -0.9447251558303833, "bits_per_byte_corr": 1.3629502973205263}, "model_output": [{"sum_logits": -1.5669825077056885, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.5669825077056885, "logits_per_char": -0.7834912538528442, "bits_per_byte": 1.1303389465141864, "num_chars": 2}, {"sum_logits": -1.0635359287261963, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.0635359287261963, "logits_per_char": -0.5317679643630981, "bits_per_byte": 0.7671790050908118, "num_chars": 2}, {"sum_logits": -1.2660620212554932, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.2660620212554932, "logits_per_char": -0.6330310106277466, "bits_per_byte": 0.9132706997622348, "num_chars": 2}, {"sum_logits": -1.8894503116607666, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.8894503116607666, "logits_per_char": -0.9447251558303833, "bits_per_byte": 1.3629502973205263, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 109, "native_id": "TAKS_2009_5_26", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4387574195861816, "logits_per_token_corr": -1.4387574195861816, "logits_per_char_corr": -0.7193787097930908, "bits_per_byte_corr": 1.0378440971403384}, "model_output": [{"sum_logits": -1.4387574195861816, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4387574195861816, "logits_per_char": -0.7193787097930908, "bits_per_byte": 1.0378440971403384, "num_chars": 2}, {"sum_logits": -1.2344861030578613, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2344861030578613, "logits_per_char": -0.6172430515289307, "bits_per_byte": 0.8904934894645989, "num_chars": 2}, {"sum_logits": -1.370394229888916, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.370394229888916, "logits_per_char": -0.685197114944458, "bits_per_byte": 0.9885304797625049, "num_chars": 2}, {"sum_logits": -1.656313419342041, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.656313419342041, "logits_per_char": -0.8281567096710205, "bits_per_byte": 1.1947775781221608, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 110, "native_id": "LEAP_2007_8_10417", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3995661735534668, "logits_per_token_corr": -1.3995661735534668, "logits_per_char_corr": -0.6997830867767334, "bits_per_byte_corr": 1.0095735889914896}, "model_output": [{"sum_logits": -1.3995661735534668, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.3995661735534668, "logits_per_char": -0.6997830867767334, "bits_per_byte": 1.0095735889914896, "num_chars": 2}, {"sum_logits": -0.9507937431335449, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": true, "logits_per_token": -0.9507937431335449, "logits_per_char": -0.47539687156677246, "bits_per_byte": 0.6858527090640029, "num_chars": 2}, {"sum_logits": -1.4256129264831543, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.4256129264831543, "logits_per_char": -0.7128064632415771, "bits_per_byte": 1.0283623496329632, "num_chars": 2}, {"sum_logits": -2.163501262664795, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -2.163501262664795, "logits_per_char": -1.0817506313323975, "bits_per_byte": 1.5606362713028763, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 111, "native_id": "Mercury_7027405", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3208489418029785, "logits_per_token_corr": -1.3208489418029785, "logits_per_char_corr": -0.6604244709014893, "bits_per_byte_corr": 0.9527911090519806}, "model_output": [{"sum_logits": -1.3208489418029785, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3208489418029785, "logits_per_char": -0.6604244709014893, "bits_per_byte": 0.9527911090519806, "num_chars": 2}, {"sum_logits": -1.1298985481262207, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.1298985481262207, "logits_per_char": -0.5649492740631104, "bits_per_byte": 0.8150495160452548, "num_chars": 2}, {"sum_logits": -1.5580811500549316, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.5580811500549316, "logits_per_char": -0.7790405750274658, "bits_per_byte": 1.123917974244219, "num_chars": 2}, {"sum_logits": -1.6896729469299316, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.6896729469299316, "logits_per_char": -0.8448364734649658, "bits_per_byte": 1.2188413906309021, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 112, "native_id": "Mercury_7058415", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4353737831115723, "logits_per_token_corr": -1.4353737831115723, "logits_per_char_corr": -0.7176868915557861, "bits_per_byte_corr": 1.0354033193592918}, "model_output": [{"sum_logits": -1.4353737831115723, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4353737831115723, "logits_per_char": -0.7176868915557861, "bits_per_byte": 1.0354033193592918, "num_chars": 2}, {"sum_logits": -1.0798726081848145, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.0798726081848145, "logits_per_char": -0.5399363040924072, "bits_per_byte": 0.7789634283105908, "num_chars": 2}, {"sum_logits": -1.267549991607666, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.267549991607666, "logits_per_char": -0.633774995803833, "bits_per_byte": 0.9143440434862704, "num_chars": 2}, {"sum_logits": -2.0354065895080566, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -2.0354065895080566, "logits_per_char": -1.0177032947540283, "bits_per_byte": 1.4682354964390505, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 113, "native_id": "Mercury_7215828", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1862534284591675, "logits_per_token_corr": -1.1862534284591675, "logits_per_char_corr": -0.5931267142295837, "bits_per_byte_corr": 0.8557009692384006}, "model_output": [{"sum_logits": -1.1862534284591675, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.1862534284591675, "logits_per_char": -0.5931267142295837, "bits_per_byte": 0.8557009692384006, "num_chars": 2}, {"sum_logits": -1.2804592847824097, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.2804592847824097, "logits_per_char": -0.6402296423912048, "bits_per_byte": 0.9236561301085693, "num_chars": 2}, {"sum_logits": -1.47105872631073, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.47105872631073, "logits_per_char": -0.735529363155365, "bits_per_byte": 1.061144564653225, "num_chars": 2}, {"sum_logits": -1.754301905632019, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.754301905632019, "logits_per_char": -0.8771509528160095, "bits_per_byte": 1.2654613297395951, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 114, "native_id": "Mercury_7064575", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.338961124420166, "logits_per_token_corr": -1.338961124420166, "logits_per_char_corr": -0.669480562210083, "bits_per_byte_corr": 0.9658562870727359}, "model_output": [{"sum_logits": -1.3848748207092285, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3848748207092285, "logits_per_char": -0.6924374103546143, "bits_per_byte": 0.9989760180453159, "num_chars": 2}, {"sum_logits": -1.338961124420166, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.338961124420166, "logits_per_char": -0.669480562210083, "bits_per_byte": 0.9658562870727359, "num_chars": 2}, {"sum_logits": -1.3858132362365723, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3858132362365723, "logits_per_char": -0.6929066181182861, "bits_per_byte": 0.9996529417591123, "num_chars": 2}, {"sum_logits": -1.584972858428955, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.584972858428955, "logits_per_char": -0.7924864292144775, "bits_per_byte": 1.1433162414003506, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 115, "native_id": "Mercury_7097493", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9430780410766602, "logits_per_token_corr": -1.9430780410766602, "logits_per_char_corr": -0.9715390205383301, "bits_per_byte_corr": 1.4016345269617765}, "model_output": [{"sum_logits": -1.3256559371948242, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3256559371948242, "logits_per_char": -0.6628279685974121, "bits_per_byte": 0.956258623258679, "num_chars": 2}, {"sum_logits": -1.0244169235229492, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.0244169235229492, "logits_per_char": -0.5122084617614746, "bits_per_byte": 0.7389606076851746, "num_chars": 2}, {"sum_logits": -1.5285139083862305, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.5285139083862305, "logits_per_char": -0.7642569541931152, "bits_per_byte": 1.1025897177801032, "num_chars": 2}, {"sum_logits": -1.9430780410766602, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.9430780410766602, "logits_per_char": -0.9715390205383301, "bits_per_byte": 1.4016345269617765, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 116, "native_id": "AKDE&ED_2008_8_47", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3093116283416748, "logits_per_token_corr": -1.3093116283416748, "logits_per_char_corr": -0.6546558141708374, "bits_per_byte_corr": 0.9444686965940724}, "model_output": [{"sum_logits": -1.4900729656219482, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4900729656219482, "logits_per_char": -0.7450364828109741, "bits_per_byte": 1.07486043903352, "num_chars": 2}, {"sum_logits": -1.3093116283416748, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3093116283416748, "logits_per_char": -0.6546558141708374, "bits_per_byte": 0.9444686965940724, "num_chars": 2}, {"sum_logits": -1.2925364971160889, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.2925364971160889, "logits_per_char": -0.6462682485580444, "bits_per_byte": 0.9323679972793565, "num_chars": 2}, {"sum_logits": -1.5072181224822998, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.5072181224822998, "logits_per_char": -0.7536090612411499, "bits_per_byte": 1.0872280554223752, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 117, "native_id": "Mercury_405136", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3945480585098267, "logits_per_token_corr": -1.3945480585098267, "logits_per_char_corr": -0.6972740292549133, "bits_per_byte_corr": 1.0059537841474522}, "model_output": [{"sum_logits": -1.2956196069717407, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.2956196069717407, "logits_per_char": -0.6478098034858704, "bits_per_byte": 0.9345919909289906, "num_chars": 2}, {"sum_logits": -1.091882348060608, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.091882348060608, "logits_per_char": -0.545941174030304, "bits_per_byte": 0.7876266243911839, "num_chars": 2}, {"sum_logits": -1.3945480585098267, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3945480585098267, "logits_per_char": -0.6972740292549133, "bits_per_byte": 1.0059537841474522, "num_chars": 2}, {"sum_logits": -2.0152945518493652, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -2.0152945518493652, "logits_per_char": -1.0076472759246826, "bits_per_byte": 1.453727727942857, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 118, "native_id": "Mercury_415086", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.341881513595581, "logits_per_token_corr": -1.341881513595581, "logits_per_char_corr": -0.6709407567977905, "bits_per_byte_corr": 0.9679629025631559}, "model_output": [{"sum_logits": -1.6588962078094482, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.6588962078094482, "logits_per_char": -0.8294481039047241, "bits_per_byte": 1.1966406661789588, "num_chars": 2}, {"sum_logits": -1.3933303356170654, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3933303356170654, "logits_per_char": -0.6966651678085327, "bits_per_byte": 1.0050753827581698, "num_chars": 2}, {"sum_logits": -1.2782008647918701, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.2782008647918701, "logits_per_char": -0.6391004323959351, "bits_per_byte": 0.9220270244482701, "num_chars": 2}, {"sum_logits": -1.341881513595581, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.341881513595581, "logits_per_char": -0.6709407567977905, "bits_per_byte": 0.9679629025631559, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 119, "native_id": "Mercury_7228725", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4008862972259521, "logits_per_token_corr": -1.4008862972259521, "logits_per_char_corr": -0.7004431486129761, "bits_per_byte_corr": 1.0105258569293178}, "model_output": [{"sum_logits": -1.3774945735931396, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3774945735931396, "logits_per_char": -0.6887472867965698, "bits_per_byte": 0.9936522950878538, "num_chars": 2}, {"sum_logits": -1.0662095546722412, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.0662095546722412, "logits_per_char": -0.5331047773361206, "bits_per_byte": 0.7691076185375888, "num_chars": 2}, {"sum_logits": -1.4008862972259521, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4008862972259521, "logits_per_char": -0.7004431486129761, "bits_per_byte": 1.0105258569293178, "num_chars": 2}, {"sum_logits": -1.9125516414642334, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.9125516414642334, "logits_per_char": -0.9562758207321167, "bits_per_byte": 1.3796143842932393, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 120, "native_id": "Mercury_7201740", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.264756202697754, "logits_per_token_corr": -1.264756202697754, "logits_per_char_corr": -0.632378101348877, "bits_per_byte_corr": 0.9123287507834585}, "model_output": [{"sum_logits": -1.264756202697754, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.264756202697754, "logits_per_char": -0.632378101348877, "bits_per_byte": 0.9123287507834585, "num_chars": 2}, {"sum_logits": -1.1156320571899414, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -1.1156320571899414, "logits_per_char": -0.5578160285949707, "bits_per_byte": 0.8047584181829186, "num_chars": 2}, {"sum_logits": -1.408238410949707, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.408238410949707, "logits_per_char": -0.7041192054748535, "bits_per_byte": 1.015829285933978, "num_chars": 2}, {"sum_logits": -2.0411157608032227, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -2.0411157608032227, "logits_per_char": -1.0205578804016113, "bits_per_byte": 1.4723537929966142, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 121, "native_id": "NYSEDREGENTS_2010_4_4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.044019103050232, "logits_per_token_corr": -1.044019103050232, "logits_per_char_corr": -0.522009551525116, "bits_per_byte_corr": 0.7531005912824977}, "model_output": [{"sum_logits": -1.044019103050232, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.044019103050232, "logits_per_char": -0.522009551525116, "bits_per_byte": 0.7531005912824977, "num_chars": 2}, {"sum_logits": -1.2457078695297241, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.2457078695297241, "logits_per_char": -0.6228539347648621, "bits_per_byte": 0.89858828288409, "num_chars": 2}, {"sum_logits": -1.4623178243637085, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4623178243637085, "logits_per_char": -0.7311589121818542, "bits_per_byte": 1.0548393367072881, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 122, "native_id": "MEAP_2005_8_21", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7197540998458862, "logits_per_token_corr": -1.7197540998458862, "logits_per_char_corr": -0.8598770499229431, "bits_per_byte_corr": 1.240540355698953}, "model_output": [{"sum_logits": -1.3889073133468628, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3889073133468628, "logits_per_char": -0.6944536566734314, "bits_per_byte": 1.001884846610686, "num_chars": 2}, {"sum_logits": -1.172709345817566, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.172709345817566, "logits_per_char": -0.586354672908783, "bits_per_byte": 0.8459309788081791, "num_chars": 2}, {"sum_logits": -1.391243815422058, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.391243815422058, "logits_per_char": -0.695621907711029, "bits_per_byte": 1.0035702765891428, "num_chars": 2}, {"sum_logits": -1.7197540998458862, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.7197540998458862, "logits_per_char": -0.8598770499229431, "bits_per_byte": 1.240540355698953, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 123, "native_id": "Mercury_7026355", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.386718511581421, "logits_per_token_corr": -1.386718511581421, "logits_per_char_corr": -0.6933592557907104, "bits_per_byte_corr": 1.0003059598844388}, "model_output": [{"sum_logits": -1.386718511581421, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.386718511581421, "logits_per_char": -0.6933592557907104, "bits_per_byte": 1.0003059598844388, "num_chars": 2}, {"sum_logits": -1.0213239192962646, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.0213239192962646, "logits_per_char": -0.5106619596481323, "bits_per_byte": 0.7367294767555297, "num_chars": 2}, {"sum_logits": -1.398669958114624, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.398669958114624, "logits_per_char": -0.699334979057312, "bits_per_byte": 1.008927106206896, "num_chars": 2}, {"sum_logits": -2.0593106746673584, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -2.0593106746673584, "logits_per_char": -1.0296553373336792, "bits_per_byte": 1.485478648997219, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 124, "native_id": "Mercury_7249708", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.8874152302742004, "logits_per_token_corr": -0.8874152302742004, "logits_per_char_corr": -0.4437076151371002, "bits_per_byte_corr": 0.6401347759634232}, "model_output": [{"sum_logits": -1.389892816543579, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.389892816543579, "logits_per_char": -0.6949464082717896, "bits_per_byte": 1.002595736898028, "num_chars": 2}, {"sum_logits": -0.8874152302742004, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -0.8874152302742004, "logits_per_char": -0.4437076151371002, "bits_per_byte": 0.6401347759634232, "num_chars": 2}, {"sum_logits": -1.5860731601715088, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.5860731601715088, "logits_per_char": -0.7930365800857544, "bits_per_byte": 1.1441099413340832, "num_chars": 2}, {"sum_logits": -2.0670416355133057, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -2.0670416355133057, "logits_per_char": -1.0335208177566528, "bits_per_byte": 1.4910553584341004, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 125, "native_id": "Mercury_7107170", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6493637561798096, "logits_per_token_corr": -1.6493637561798096, "logits_per_char_corr": -0.8246818780899048, "bits_per_byte_corr": 1.1897644558321572}, "model_output": [{"sum_logits": -1.3545639514923096, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3545639514923096, "logits_per_char": -0.6772819757461548, "bits_per_byte": 0.9771113476931587, "num_chars": 2}, {"sum_logits": -1.293900728225708, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.293900728225708, "logits_per_char": -0.646950364112854, "bits_per_byte": 0.9333520820075942, "num_chars": 2}, {"sum_logits": -1.3465321063995361, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3465321063995361, "logits_per_char": -0.6732660531997681, "bits_per_byte": 0.9713175961508882, "num_chars": 2}, {"sum_logits": -1.6493637561798096, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.6493637561798096, "logits_per_char": -0.8246818780899048, "bits_per_byte": 1.1897644558321572, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 126, "native_id": "Mercury_183820", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3373968601226807, "logits_per_token_corr": -1.3373968601226807, "logits_per_char_corr": -0.6686984300613403, "bits_per_byte_corr": 0.9647279089004241}, "model_output": [{"sum_logits": -1.3373968601226807, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3373968601226807, "logits_per_char": -0.6686984300613403, "bits_per_byte": 0.9647279089004241, "num_chars": 2}, {"sum_logits": -1.4833853244781494, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4833853244781494, "logits_per_char": -0.7416926622390747, "bits_per_byte": 1.0700363256768148, "num_chars": 2}, {"sum_logits": -1.332063913345337, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.332063913345337, "logits_per_char": -0.6660319566726685, "bits_per_byte": 0.9608810009659221, "num_chars": 2}, {"sum_logits": -1.5659544467926025, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.5659544467926025, "logits_per_char": -0.7829772233963013, "bits_per_byte": 1.1295973573236655, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 127, "native_id": "Mercury_SC_401357", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3536560535430908, "logits_per_token_corr": -1.3536560535430908, "logits_per_char_corr": -0.6768280267715454, "bits_per_byte_corr": 0.9764564377586726}, "model_output": [{"sum_logits": -1.615525484085083, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.615525484085083, "logits_per_char": -0.8077627420425415, "bits_per_byte": 1.1653553021604828, "num_chars": 2}, {"sum_logits": -1.3536560535430908, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3536560535430908, "logits_per_char": -0.6768280267715454, "bits_per_byte": 0.9764564377586726, "num_chars": 2}, {"sum_logits": -1.3173954486846924, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.3173954486846924, "logits_per_char": -0.6586977243423462, "bits_per_byte": 0.950299940354231, "num_chars": 2}, {"sum_logits": -1.349564790725708, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.349564790725708, "logits_per_char": -0.674782395362854, "bits_per_byte": 0.9735052154698647, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 128, "native_id": "NYSEDREGENTS_2008_8_11", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7415767908096313, "logits_per_token_corr": -1.7415767908096313, "logits_per_char_corr": -0.8707883954048157, "bits_per_byte_corr": 1.2562820997150879}, "model_output": [{"sum_logits": -1.3432918787002563, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3432918787002563, "logits_per_char": -0.6716459393501282, "bits_per_byte": 0.9689802659343355, "num_chars": 2}, {"sum_logits": -1.256015419960022, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.256015419960022, "logits_per_char": -0.628007709980011, "bits_per_byte": 0.9060236088288471, "num_chars": 2}, {"sum_logits": -1.328179955482483, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.328179955482483, "logits_per_char": -0.6640899777412415, "bits_per_byte": 0.9580793175920395, "num_chars": 2}, {"sum_logits": -1.7415767908096313, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.7415767908096313, "logits_per_char": -0.8707883954048157, "bits_per_byte": 1.2562820997150879, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 129, "native_id": "Mercury_416650", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3251988887786865, "logits_per_token_corr": -1.3251988887786865, "logits_per_char_corr": -0.6625994443893433, "bits_per_byte_corr": 0.9559289325169747}, "model_output": [{"sum_logits": -1.4839112758636475, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4839112758636475, "logits_per_char": -0.7419556379318237, "bits_per_byte": 1.0704157194046184, "num_chars": 2}, {"sum_logits": -1.3691232204437256, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3691232204437256, "logits_per_char": -0.6845616102218628, "bits_per_byte": 0.9876136402507545, "num_chars": 2}, {"sum_logits": -1.3251988887786865, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.3251988887786865, "logits_per_char": -0.6625994443893433, "bits_per_byte": 0.9559289325169747, "num_chars": 2}, {"sum_logits": -1.449704885482788, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.449704885482788, "logits_per_char": -0.724852442741394, "bits_per_byte": 1.0457410245200118, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 130, "native_id": "NCEOGA_2013_5_20", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2886850833892822, "logits_per_token_corr": -1.2886850833892822, "logits_per_char_corr": -0.6443425416946411, "bits_per_byte_corr": 0.9295897895373167}, "model_output": [{"sum_logits": -1.4583227634429932, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4583227634429932, "logits_per_char": -0.7291613817214966, "bits_per_byte": 1.0519575094181033, "num_chars": 2}, {"sum_logits": -1.2886850833892822, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.2886850833892822, "logits_per_char": -0.6443425416946411, "bits_per_byte": 0.9295897895373167, "num_chars": 2}, {"sum_logits": -1.2336828708648682, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2336828708648682, "logits_per_char": -0.6168414354324341, "bits_per_byte": 0.8899140799138417, "num_chars": 2}, {"sum_logits": -1.705232858657837, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.705232858657837, "logits_per_char": -0.8526164293289185, "bits_per_byte": 1.2300654943741698, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 131, "native_id": "Mercury_400500", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5849449634552002, "logits_per_token_corr": -1.5849449634552002, "logits_per_char_corr": -0.7924724817276001, "bits_per_byte_corr": 1.1432961194301998}, "model_output": [{"sum_logits": -1.477381944656372, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.477381944656372, "logits_per_char": -0.738690972328186, "bits_per_byte": 1.065705802528086, "num_chars": 2}, {"sum_logits": -1.0974571704864502, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.0974571704864502, "logits_per_char": -0.5487285852432251, "bits_per_byte": 0.7916480087249864, "num_chars": 2}, {"sum_logits": -1.5849449634552002, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5849449634552002, "logits_per_char": -0.7924724817276001, "bits_per_byte": 1.1432961194301998, "num_chars": 2}, {"sum_logits": -1.5631554126739502, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5631554126739502, "logits_per_char": -0.7815777063369751, "bits_per_byte": 1.1275782810025345, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 132, "native_id": "Mercury_SC_401366", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.852303147315979, "logits_per_token_corr": -1.852303147315979, "logits_per_char_corr": -0.9261515736579895, "bits_per_byte_corr": 1.3361542824288508}, "model_output": [{"sum_logits": -1.379372239112854, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.379372239112854, "logits_per_char": -0.689686119556427, "bits_per_byte": 0.9950067444547248, "num_chars": 2}, {"sum_logits": -1.1199957132339478, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.1199957132339478, "logits_per_char": -0.5599978566169739, "bits_per_byte": 0.8079061306503375, "num_chars": 2}, {"sum_logits": -1.37972891330719, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.37972891330719, "logits_per_char": -0.689864456653595, "bits_per_byte": 0.9952640305004158, "num_chars": 2}, {"sum_logits": -1.852303147315979, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.852303147315979, "logits_per_char": -0.9261515736579895, "bits_per_byte": 1.3361542824288508, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 133, "native_id": "Mercury_7141610", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2693620920181274, "logits_per_token_corr": -1.2693620920181274, "logits_per_char_corr": -0.6346810460090637, "bits_per_byte_corr": 0.915651197624154}, "model_output": [{"sum_logits": -1.4379774332046509, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4379774332046509, "logits_per_char": -0.7189887166023254, "bits_per_byte": 1.0372814558980405, "num_chars": 2}, {"sum_logits": -1.2693620920181274, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.2693620920181274, "logits_per_char": -0.6346810460090637, "bits_per_byte": 0.915651197624154, "num_chars": 2}, {"sum_logits": -1.2774149179458618, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.2774149179458618, "logits_per_char": -0.6387074589729309, "bits_per_byte": 0.9214600836397004, "num_chars": 2}, {"sum_logits": -1.6833559274673462, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.6833559274673462, "logits_per_char": -0.8416779637336731, "bits_per_byte": 1.2142846243049634, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 134, "native_id": "Mercury_7247013", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1106048822402954, "logits_per_token_corr": -1.1106048822402954, "logits_per_char_corr": -0.5553024411201477, "bits_per_byte_corr": 0.8011320779981482}, "model_output": [{"sum_logits": -1.4498001337051392, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4498001337051392, "logits_per_char": -0.7249000668525696, "bits_per_byte": 1.0458097315890316, "num_chars": 2}, {"sum_logits": -1.1106048822402954, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.1106048822402954, "logits_per_char": -0.5553024411201477, "bits_per_byte": 0.8011320779981482, "num_chars": 2}, {"sum_logits": -1.2907577753067017, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.2907577753067017, "logits_per_char": -0.6453788876533508, "bits_per_byte": 0.9310849207125936, "num_chars": 2}, {"sum_logits": -1.8892933130264282, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.8892933130264282, "logits_per_char": -0.9446466565132141, "bits_per_byte": 1.3628370467449331, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 135, "native_id": "NYSEDREGENTS_2008_8_30", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2000327110290527, "logits_per_token_corr": -1.2000327110290527, "logits_per_char_corr": -0.6000163555145264, "bits_per_byte_corr": 0.8656406205536983}, "model_output": [{"sum_logits": -1.2806429862976074, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.2806429862976074, "logits_per_char": -0.6403214931488037, "bits_per_byte": 0.9237886427410592, "num_chars": 2}, {"sum_logits": -1.2000327110290527, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.2000327110290527, "logits_per_char": -0.6000163555145264, "bits_per_byte": 0.8656406205536983, "num_chars": 2}, {"sum_logits": -1.5687956809997559, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.5687956809997559, "logits_per_char": -0.7843978404998779, "bits_per_byte": 1.131646874573999, "num_chars": 2}, {"sum_logits": -1.657548427581787, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.657548427581787, "logits_per_char": -0.8287742137908936, "bits_per_byte": 1.1956684482536306, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 136, "native_id": "ACTAAP_2011_5_16", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1309670209884644, "logits_per_token_corr": -1.1309670209884644, "logits_per_char_corr": -0.5654835104942322, "bits_per_byte_corr": 0.815820256295097}, "model_output": [{"sum_logits": -1.3601559400558472, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.3601559400558472, "logits_per_char": -0.6800779700279236, "bits_per_byte": 0.9811451147778233, "num_chars": 2}, {"sum_logits": -1.1309670209884644, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.1309670209884644, "logits_per_char": -0.5654835104942322, "bits_per_byte": 0.815820256295097, "num_chars": 2}, {"sum_logits": -1.5419243574142456, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.5419243574142456, "logits_per_char": -0.7709621787071228, "bits_per_byte": 1.112263311934516, "num_chars": 2}, {"sum_logits": -1.6557244062423706, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.6557244062423706, "logits_per_char": -0.8278622031211853, "bits_per_byte": 1.1943526949832037, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 137, "native_id": "Mercury_7093153", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1313564777374268, "logits_per_token_corr": -1.1313564777374268, "logits_per_char_corr": -0.5656782388687134, "bits_per_byte_corr": 0.8161011899552816}, "model_output": [{"sum_logits": -1.112128496170044, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.112128496170044, "logits_per_char": -0.556064248085022, "bits_per_byte": 0.8022311331284878, "num_chars": 2}, {"sum_logits": -1.1313564777374268, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.1313564777374268, "logits_per_char": -0.5656782388687134, "bits_per_byte": 0.8161011899552816, "num_chars": 2}, {"sum_logits": -1.4606144428253174, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4606144428253174, "logits_per_char": -0.7303072214126587, "bits_per_byte": 1.0536106066581978, "num_chars": 2}, {"sum_logits": -2.24822735786438, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -2.24822735786438, "logits_per_char": -1.12411367893219, "bits_per_byte": 1.621753229992084, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 138, "native_id": "Mercury_7013965", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5606298446655273, "logits_per_token_corr": -1.5606298446655273, "logits_per_char_corr": -0.7803149223327637, "bits_per_byte_corr": 1.1257564687819435}, "model_output": [{"sum_logits": -1.5606298446655273, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.5606298446655273, "logits_per_char": -0.7803149223327637, "bits_per_byte": 1.1257564687819435, "num_chars": 2}, {"sum_logits": -1.1581048965454102, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.1581048965454102, "logits_per_char": -0.5790524482727051, "bits_per_byte": 0.8353960955382448, "num_chars": 2}, {"sum_logits": -1.2340478897094727, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.2340478897094727, "logits_per_char": -0.6170239448547363, "bits_per_byte": 0.8901773853523128, "num_chars": 2}, {"sum_logits": -1.748488426208496, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.748488426208496, "logits_per_char": -0.874244213104248, "bits_per_byte": 1.261267790772279, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 139, "native_id": "Mercury_7034843", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3389559984207153, "logits_per_token_corr": -1.3389559984207153, "logits_per_char_corr": -0.6694779992103577, "bits_per_byte_corr": 0.9658525894457423}, "model_output": [{"sum_logits": -1.1563273668289185, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.1563273668289185, "logits_per_char": -0.5781636834144592, "bits_per_byte": 0.8341138788847362, "num_chars": 2}, {"sum_logits": -1.3389559984207153, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3389559984207153, "logits_per_char": -0.6694779992103577, "bits_per_byte": 0.9658525894457423, "num_chars": 2}, {"sum_logits": -1.4759246110916138, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4759246110916138, "logits_per_char": -0.7379623055458069, "bits_per_byte": 1.0646545585746865, "num_chars": 2}, {"sum_logits": -1.7823058366775513, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.7823058366775513, "logits_per_char": -0.8911529183387756, "bits_per_byte": 1.2856618959620028, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 140, "native_id": "Mercury_SC_407610", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.1916556358337402, "logits_per_token_corr": -2.1916556358337402, "logits_per_char_corr": -1.0958278179168701, "bits_per_byte_corr": 1.5809453585779782}, "model_output": [{"sum_logits": -1.2453209161758423, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.2453209161758423, "logits_per_char": -0.6226604580879211, "bits_per_byte": 0.8983091550417395, "num_chars": 2}, {"sum_logits": -1.0029217004776, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.0029217004776, "logits_per_char": -0.5014608502388, "bits_per_byte": 0.7234550818399997, "num_chars": 2}, {"sum_logits": -1.485284447669983, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.485284447669983, "logits_per_char": -0.7426422238349915, "bits_per_byte": 1.0714062534822635, "num_chars": 2}, {"sum_logits": -2.1916556358337402, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -2.1916556358337402, "logits_per_char": -1.0958278179168701, "bits_per_byte": 1.5809453585779782, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 141, "native_id": "Mercury_405947", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.598825454711914, "logits_per_token_corr": -1.598825454711914, "logits_per_char_corr": -0.799412727355957, "bits_per_byte_corr": 1.1533087773807889}, "model_output": [{"sum_logits": -1.1555442810058594, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.1555442810058594, "logits_per_char": -0.5777721405029297, "bits_per_byte": 0.8335490018679769, "num_chars": 2}, {"sum_logits": -1.0193862915039062, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.0193862915039062, "logits_per_char": -0.5096931457519531, "bits_per_byte": 0.7353317737519667, "num_chars": 2}, {"sum_logits": -1.598825454711914, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.598825454711914, "logits_per_char": -0.799412727355957, "bits_per_byte": 1.1533087773807889, "num_chars": 2}, {"sum_logits": -2.158536911010742, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -2.158536911010742, "logits_per_char": -1.079268455505371, "bits_per_byte": 1.5570552485466085, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 142, "native_id": "AKDE&ED_2012_8_6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6057958602905273, "logits_per_token_corr": -1.6057958602905273, "logits_per_char_corr": -0.8028979301452637, "bits_per_byte_corr": 1.1583368621614174}, "model_output": [{"sum_logits": -1.6057958602905273, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.6057958602905273, "logits_per_char": -0.8028979301452637, "bits_per_byte": 1.1583368621614174, "num_chars": 2}, {"sum_logits": -1.1333379745483398, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.1333379745483398, "logits_per_char": -0.5666689872741699, "bits_per_byte": 0.8175305377666034, "num_chars": 2}, {"sum_logits": -1.344639778137207, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.344639778137207, "logits_per_char": -0.6723198890686035, "bits_per_byte": 0.9699525698509891, "num_chars": 2}, {"sum_logits": -1.5998353958129883, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.5998353958129883, "logits_per_char": -0.7999176979064941, "bits_per_byte": 1.1540372958898442, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 143, "native_id": "Mercury_7011130", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4843776226043701, "logits_per_token_corr": -1.4843776226043701, "logits_per_char_corr": -0.7421888113021851, "bits_per_byte_corr": 1.0707521174697063}, "model_output": [{"sum_logits": -1.4843776226043701, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.4843776226043701, "logits_per_char": -0.7421888113021851, "bits_per_byte": 1.0707521174697063, "num_chars": 2}, {"sum_logits": -1.006890058517456, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -1.006890058517456, "logits_per_char": -0.503445029258728, "bits_per_byte": 0.7263176470722879, "num_chars": 2}, {"sum_logits": -1.4739959239959717, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.4739959239959717, "logits_per_char": -0.7369979619979858, "bits_per_byte": 1.0632633049205307, "num_chars": 2}, {"sum_logits": -1.8432776927947998, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.8432776927947998, "logits_per_char": -0.9216388463973999, "bits_per_byte": 1.3296437931891092, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 144, "native_id": "Mercury_LBS11022", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4896612167358398, "logits_per_token_corr": -1.4896612167358398, "logits_per_char_corr": -0.7448306083679199, "bits_per_byte_corr": 1.0745634249954796}, "model_output": [{"sum_logits": -1.4896612167358398, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4896612167358398, "logits_per_char": -0.7448306083679199, "bits_per_byte": 1.0745634249954796, "num_chars": 2}, {"sum_logits": -1.1894559860229492, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.1894559860229492, "logits_per_char": -0.5947279930114746, "bits_per_byte": 0.858011126196117, "num_chars": 2}, {"sum_logits": -1.3365869522094727, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3365869522094727, "logits_per_char": -0.6682934761047363, "bits_per_byte": 0.9641436838354427, "num_chars": 2}, {"sum_logits": -1.6561555862426758, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.6561555862426758, "logits_per_char": -0.8280777931213379, "bits_per_byte": 1.1946637256072894, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 145, "native_id": "TIMSS_1995_8_J1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3963598012924194, "logits_per_token_corr": -1.3963598012924194, "logits_per_char_corr": -0.6981799006462097, "bits_per_byte_corr": 1.0072606803113595}, "model_output": [{"sum_logits": -1.2801965475082397, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.2801965475082397, "logits_per_char": -0.6400982737541199, "bits_per_byte": 0.9234666052273183, "num_chars": 2}, {"sum_logits": -1.1158689260482788, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": true, "logits_per_token": -1.1158689260482788, "logits_per_char": -0.5579344630241394, "bits_per_byte": 0.804929282946551, "num_chars": 2}, {"sum_logits": -1.3963598012924194, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.3963598012924194, "logits_per_char": -0.6981799006462097, "bits_per_byte": 1.0072606803113595, "num_chars": 2}, {"sum_logits": -1.9825242757797241, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.9825242757797241, "logits_per_char": -0.9912621378898621, "bits_per_byte": 1.4300889705557231, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 146, "native_id": "Mercury_SC_408366", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3820856809616089, "logits_per_token_corr": -1.3820856809616089, "logits_per_char_corr": -0.6910428404808044, "bits_per_byte_corr": 0.9969640790041958}, "model_output": [{"sum_logits": -1.3406866788864136, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3406866788864136, "logits_per_char": -0.6703433394432068, "bits_per_byte": 0.9671010115083563, "num_chars": 2}, {"sum_logits": -1.3820856809616089, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3820856809616089, "logits_per_char": -0.6910428404808044, "bits_per_byte": 0.9969640790041958, "num_chars": 2}, {"sum_logits": -1.2478903532028198, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.2478903532028198, "logits_per_char": -0.6239451766014099, "bits_per_byte": 0.9001626120700893, "num_chars": 2}, {"sum_logits": -1.7113779783248901, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.7113779783248901, "logits_per_char": -0.8556889891624451, "bits_per_byte": 1.2344982612088364, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 147, "native_id": "Mercury_7009993", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2707607746124268, "logits_per_token_corr": -1.2707607746124268, "logits_per_char_corr": -0.6353803873062134, "bits_per_byte_corr": 0.9166601338454414}, "model_output": [{"sum_logits": -1.2707607746124268, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.2707607746124268, "logits_per_char": -0.6353803873062134, "bits_per_byte": 0.9166601338454414, "num_chars": 2}, {"sum_logits": -1.1459991931915283, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.1459991931915283, "logits_per_char": -0.5729995965957642, "bits_per_byte": 0.8266636764406794, "num_chars": 2}, {"sum_logits": -1.4583885669708252, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4583885669708252, "logits_per_char": -0.7291942834854126, "bits_per_byte": 1.0520049766297415, "num_chars": 2}, {"sum_logits": -1.887274980545044, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.887274980545044, "logits_per_char": -0.943637490272522, "bits_per_byte": 1.361381127614053, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 148, "native_id": "Mercury_401699", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2111668586730957, "logits_per_token_corr": -1.2111668586730957, "logits_per_char_corr": -0.6055834293365479, "bits_per_byte_corr": 0.8736722103489972}, "model_output": [{"sum_logits": -1.6525864601135254, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.6525864601135254, "logits_per_char": -0.8262932300567627, "bits_per_byte": 1.1920891453238713, "num_chars": 2}, {"sum_logits": -1.5404458045959473, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.5404458045959473, "logits_per_char": -0.7702229022979736, "bits_per_byte": 1.1111967615251894, "num_chars": 2}, {"sum_logits": -1.2111668586730957, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.2111668586730957, "logits_per_char": -0.6055834293365479, "bits_per_byte": 0.8736722103489972, "num_chars": 2}, {"sum_logits": -1.5783047676086426, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.5783047676086426, "logits_per_char": -0.7891523838043213, "bits_per_byte": 1.138506230621016, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 149, "native_id": "Mercury_7056858", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6850440502166748, "logits_per_token_corr": -1.6850440502166748, "logits_per_char_corr": -0.8425220251083374, "bits_per_byte_corr": 1.2155023474643984}, "model_output": [{"sum_logits": -1.6850440502166748, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.6850440502166748, "logits_per_char": -0.8425220251083374, "bits_per_byte": 1.2155023474643984, "num_chars": 2}, {"sum_logits": -1.1819217205047607, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.1819217205047607, "logits_per_char": -0.5909608602523804, "bits_per_byte": 0.8525763024461974, "num_chars": 2}, {"sum_logits": -1.3592689037322998, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3592689037322998, "logits_per_char": -0.6796344518661499, "bits_per_byte": 0.9805052533252878, "num_chars": 2}, {"sum_logits": -1.4501121044158936, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4501121044158936, "logits_per_char": -0.7250560522079468, "bits_per_byte": 1.0460347708876856, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 150, "native_id": "Mercury_7027160", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.499267339706421, "logits_per_token_corr": -1.499267339706421, "logits_per_char_corr": -0.7496336698532104, "bits_per_byte_corr": 1.081492777981398}, "model_output": [{"sum_logits": -1.1077287197113037, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.1077287197113037, "logits_per_char": -0.5538643598556519, "bits_per_byte": 0.7990573652894633, "num_chars": 2}, {"sum_logits": -1.2857892513275146, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.2857892513275146, "logits_per_char": -0.6428946256637573, "bits_per_byte": 0.9275008882599356, "num_chars": 2}, {"sum_logits": -1.499267339706421, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.499267339706421, "logits_per_char": -0.7496336698532104, "bits_per_byte": 1.081492777981398, "num_chars": 2}, {"sum_logits": -1.9091413021087646, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.9091413021087646, "logits_per_char": -0.9545706510543823, "bits_per_byte": 1.3771543444552958, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 151, "native_id": "Mercury_400811", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7023288011550903, "logits_per_token_corr": -1.7023288011550903, "logits_per_char_corr": -0.8511644005775452, "bits_per_byte_corr": 1.2279706596953337}, "model_output": [{"sum_logits": -1.1784907579421997, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.1784907579421997, "logits_per_char": -0.5892453789710999, "bits_per_byte": 0.8501013861089544, "num_chars": 2}, {"sum_logits": -1.3634082078933716, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3634082078933716, "logits_per_char": -0.6817041039466858, "bits_per_byte": 0.9834911301182446, "num_chars": 2}, {"sum_logits": -1.50110924243927, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.50110924243927, "logits_per_char": -0.750554621219635, "bits_per_byte": 1.0828214299506398, "num_chars": 2}, {"sum_logits": -1.7023288011550903, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.7023288011550903, "logits_per_char": -0.8511644005775452, "bits_per_byte": 1.2279706596953337, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 152, "native_id": "Mercury_SC_400062", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.1166341304779053, "logits_per_token_corr": -2.1166341304779053, "logits_per_char_corr": -1.0583170652389526, "bits_per_byte_corr": 1.5268287817094954}, "model_output": [{"sum_logits": -1.2340734004974365, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.2340734004974365, "logits_per_char": -0.6170367002487183, "bits_per_byte": 0.8901957874959552, "num_chars": 2}, {"sum_logits": -1.1128137111663818, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.1128137111663818, "logits_per_char": -0.5564068555831909, "bits_per_byte": 0.8027254112670679, "num_chars": 2}, {"sum_logits": -1.4058473110198975, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4058473110198975, "logits_per_char": -0.7029236555099487, "bits_per_byte": 1.0141044719284737, "num_chars": 2}, {"sum_logits": -2.1166341304779053, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -2.1166341304779053, "logits_per_char": -1.0583170652389526, "bits_per_byte": 1.5268287817094954, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 153, "native_id": "Mercury_400699", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.261864423751831, "logits_per_token_corr": -1.261864423751831, "logits_per_char_corr": -0.6309322118759155, "bits_per_byte_corr": 0.910242773211142}, "model_output": [{"sum_logits": -1.585432767868042, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.585432767868042, "logits_per_char": -0.792716383934021, "bits_per_byte": 1.1436479959338652, "num_chars": 2}, {"sum_logits": -1.261864423751831, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.261864423751831, "logits_per_char": -0.6309322118759155, "bits_per_byte": 0.910242773211142, "num_chars": 2}, {"sum_logits": -1.3264548778533936, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3264548778533936, "logits_per_char": -0.6632274389266968, "bits_per_byte": 0.9568349371217207, "num_chars": 2}, {"sum_logits": -1.4590117931365967, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4590117931365967, "logits_per_char": -0.7295058965682983, "bits_per_byte": 1.0524545392790972, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 154, "native_id": "Mercury_7029803", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.932241678237915, "logits_per_token_corr": -0.932241678237915, "logits_per_char_corr": -0.4661208391189575, "bits_per_byte_corr": 0.6724702230524054}, "model_output": [{"sum_logits": -1.4721128940582275, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4721128940582275, "logits_per_char": -0.7360564470291138, "bits_per_byte": 1.0619049859440153, "num_chars": 2}, {"sum_logits": -0.932241678237915, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -0.932241678237915, "logits_per_char": -0.4661208391189575, "bits_per_byte": 0.6724702230524054, "num_chars": 2}, {"sum_logits": -1.3546583652496338, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3546583652496338, "logits_per_char": -0.6773291826248169, "bits_per_byte": 0.9771794528229004, "num_chars": 2}, {"sum_logits": -2.204885721206665, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -2.204885721206665, "logits_per_char": -1.1024428606033325, "bits_per_byte": 1.5904888478570132, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 155, "native_id": "Mercury_SC_401372", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.913851261138916, "logits_per_token_corr": -1.913851261138916, "logits_per_char_corr": -0.956925630569458, "bits_per_byte_corr": 1.3805518617230932}, "model_output": [{"sum_logits": -1.2918553352355957, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.2918553352355957, "logits_per_char": -0.6459276676177979, "bits_per_byte": 0.9318766428458412, "num_chars": 2}, {"sum_logits": -1.2552685737609863, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.2552685737609863, "logits_per_char": -0.6276342868804932, "bits_per_byte": 0.905484873175019, "num_chars": 2}, {"sum_logits": -1.3168931007385254, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3168931007385254, "logits_per_char": -0.6584465503692627, "bits_per_byte": 0.9499375729088628, "num_chars": 2}, {"sum_logits": -1.913851261138916, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.913851261138916, "logits_per_char": -0.956925630569458, "bits_per_byte": 1.3805518617230932, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 156, "native_id": "Mercury_7271128", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1893668174743652, "logits_per_token_corr": -1.1893668174743652, "logits_per_char_corr": -0.5946834087371826, "bits_per_byte_corr": 0.8579468046846942}, "model_output": [{"sum_logits": -1.3364558219909668, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3364558219909668, "logits_per_char": -0.6682279109954834, "bits_per_byte": 0.9640490933774681, "num_chars": 2}, {"sum_logits": -1.452928066253662, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.452928066253662, "logits_per_char": -0.726464033126831, "bits_per_byte": 1.0480660579770278, "num_chars": 2}, {"sum_logits": -1.1893668174743652, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.1893668174743652, "logits_per_char": -0.5946834087371826, "bits_per_byte": 0.8579468046846942, "num_chars": 2}, {"sum_logits": -1.7343878746032715, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.7343878746032715, "logits_per_char": -0.8671939373016357, "bits_per_byte": 1.2510963928349434, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 157, "native_id": "Mercury_407260", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1147668361663818, "logits_per_token_corr": -1.1147668361663818, "logits_per_char_corr": -0.5573834180831909, "bits_per_byte_corr": 0.804134293142937}, "model_output": [{"sum_logits": -1.2551839351654053, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.2551839351654053, "logits_per_char": -0.6275919675827026, "bits_per_byte": 0.9054238193339627, "num_chars": 2}, {"sum_logits": -1.1147668361663818, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.1147668361663818, "logits_per_char": -0.5573834180831909, "bits_per_byte": 0.804134293142937, "num_chars": 2}, {"sum_logits": -1.507220983505249, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.507220983505249, "logits_per_char": -0.7536104917526245, "bits_per_byte": 1.0872301192141856, "num_chars": 2}, {"sum_logits": -1.90860915184021, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.90860915184021, "logits_per_char": -0.954304575920105, "bits_per_byte": 1.3767704791785698, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 158, "native_id": "Mercury_SC_416155", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2822619676589966, "logits_per_token_corr": -1.2822619676589966, "logits_per_char_corr": -0.6411309838294983, "bits_per_byte_corr": 0.9249564909317439}, "model_output": [{"sum_logits": -1.405417561531067, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.405417561531067, "logits_per_char": -0.7027087807655334, "bits_per_byte": 1.0137944732002933, "num_chars": 2}, {"sum_logits": -1.3412028551101685, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3412028551101685, "logits_per_char": -0.6706014275550842, "bits_per_byte": 0.9674733539474746, "num_chars": 2}, {"sum_logits": -1.2822619676589966, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2822619676589966, "logits_per_char": -0.6411309838294983, "bits_per_byte": 0.9249564909317439, "num_chars": 2}, {"sum_logits": -1.6275111436843872, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.6275111436843872, "logits_per_char": -0.8137555718421936, "bits_per_byte": 1.1740011279933387, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 159, "native_id": "Mercury_402145", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1866142749786377, "logits_per_token_corr": -1.1866142749786377, "logits_per_char_corr": -0.5933071374893188, "bits_per_byte_corr": 0.8559612649804816}, "model_output": [{"sum_logits": -1.6353771686553955, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.6353771686553955, "logits_per_char": -0.8176885843276978, "bits_per_byte": 1.179675265601934, "num_chars": 2}, {"sum_logits": -1.1866142749786377, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.1866142749786377, "logits_per_char": -0.5933071374893188, "bits_per_byte": 0.8559612649804816, "num_chars": 2}, {"sum_logits": -1.4122841358184814, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4122841358184814, "logits_per_char": -0.7061420679092407, "bits_per_byte": 1.018747659536471, "num_chars": 2}, {"sum_logits": -1.4243099689483643, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4243099689483643, "logits_per_char": -0.7121549844741821, "bits_per_byte": 1.0274224644459975, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 160, "native_id": "AIMS_2009_4_5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4750843048095703, "logits_per_token_corr": -1.4750843048095703, "logits_per_char_corr": -0.7375421524047852, "bits_per_byte_corr": 1.06404840572172}, "model_output": [{"sum_logits": -1.4750843048095703, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4750843048095703, "logits_per_char": -0.7375421524047852, "bits_per_byte": 1.06404840572172, "num_chars": 2}, {"sum_logits": -1.2258243560791016, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.2258243560791016, "logits_per_char": -0.6129121780395508, "bits_per_byte": 0.8842453597587486, "num_chars": 2}, {"sum_logits": -1.2593498229980469, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.2593498229980469, "logits_per_char": -0.6296749114990234, "bits_per_byte": 0.9084288721924907, "num_chars": 2}, {"sum_logits": -1.6931037902832031, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.6931037902832031, "logits_per_char": -0.8465518951416016, "bits_per_byte": 1.2213162209768198, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 161, "native_id": "TIMSS_2003_4_pg7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7158806324005127, "logits_per_token_corr": -1.7158806324005127, "logits_per_char_corr": -0.8579403162002563, "bits_per_byte_corr": 1.2377462395617083}, "model_output": [{"sum_logits": -0.9836685657501221, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -0.9836685657501221, "logits_per_char": -0.49183428287506104, "bits_per_byte": 0.7095668808435399, "num_chars": 2}, {"sum_logits": -1.5260822772979736, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.5260822772979736, "logits_per_char": -0.7630411386489868, "bits_per_byte": 1.100835666723952, "num_chars": 2}, {"sum_logits": -1.549729585647583, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.549729585647583, "logits_per_char": -0.7748647928237915, "bits_per_byte": 1.1178935939671413, "num_chars": 2}, {"sum_logits": -1.7158806324005127, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.7158806324005127, "logits_per_char": -0.8579403162002563, "bits_per_byte": 1.2377462395617083, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 162, "native_id": "Mercury_7142415", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4711753129959106, "logits_per_token_corr": -1.4711753129959106, "logits_per_char_corr": -0.7355876564979553, "bits_per_byte_corr": 1.0612286641694968}, "model_output": [{"sum_logits": -1.4711753129959106, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.4711753129959106, "logits_per_char": -0.7355876564979553, "bits_per_byte": 1.0612286641694968, "num_chars": 2}, {"sum_logits": -0.9499160051345825, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": true, "logits_per_token": -0.9499160051345825, "logits_per_char": -0.47495800256729126, "bits_per_byte": 0.685219554934851, "num_chars": 2}, {"sum_logits": -1.4712382555007935, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.4712382555007935, "logits_per_char": -0.7356191277503967, "bits_per_byte": 1.0612740675893246, "num_chars": 2}, {"sum_logits": -1.9638549089431763, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.9638549089431763, "logits_per_char": -0.9819274544715881, "bits_per_byte": 1.4166218690799013, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 163, "native_id": "Mercury_7212818", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.023303508758545, "logits_per_token_corr": -1.023303508758545, "logits_per_char_corr": -0.5116517543792725, "bits_per_byte_corr": 0.7381574487056447}, "model_output": [{"sum_logits": -1.2131648063659668, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.2131648063659668, "logits_per_char": -0.6065824031829834, "bits_per_byte": 0.8751134249632286, "num_chars": 2}, {"sum_logits": -1.023303508758545, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.023303508758545, "logits_per_char": -0.5116517543792725, "bits_per_byte": 0.7381574487056447, "num_chars": 2}, {"sum_logits": -1.509294033050537, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.509294033050537, "logits_per_char": -0.7546470165252686, "bits_per_byte": 1.0887255083634388, "num_chars": 2}, {"sum_logits": -2.1817564964294434, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -2.1817564964294434, "logits_per_char": -1.0908782482147217, "bits_per_byte": 1.5738046389141493, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 164, "native_id": "Mercury_SC_413299", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4319573640823364, "logits_per_token_corr": -1.4319573640823364, "logits_per_char_corr": -0.7159786820411682, "bits_per_byte_corr": 1.0329388939637514}, "model_output": [{"sum_logits": -1.4319573640823364, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.4319573640823364, "logits_per_char": -0.7159786820411682, "bits_per_byte": 1.0329388939637514, "num_chars": 2}, {"sum_logits": -1.407720685005188, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.407720685005188, "logits_per_char": -0.703860342502594, "bits_per_byte": 1.0154558256076291, "num_chars": 2}, {"sum_logits": -1.3523236513137817, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.3523236513137817, "logits_per_char": -0.6761618256568909, "bits_per_byte": 0.9754953127143251, "num_chars": 2}, {"sum_logits": -1.4286309480667114, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.4286309480667114, "logits_per_char": -0.7143154740333557, "bits_per_byte": 1.0305393920189119, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 165, "native_id": "Mercury_7132020", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2395446300506592, "logits_per_token_corr": -1.2395446300506592, "logits_per_char_corr": -0.6197723150253296, "bits_per_byte_corr": 0.8941424453679577}, "model_output": [{"sum_logits": -1.2395446300506592, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.2395446300506592, "logits_per_char": -0.6197723150253296, "bits_per_byte": 0.8941424453679577, "num_chars": 2}, {"sum_logits": -1.3598887920379639, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3598887920379639, "logits_per_char": -0.6799443960189819, "bits_per_byte": 0.9809524082175314, "num_chars": 2}, {"sum_logits": -1.4581515789031982, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4581515789031982, "logits_per_char": -0.7290757894515991, "bits_per_byte": 1.0518340258747838, "num_chars": 2}, {"sum_logits": -1.6459319591522217, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.6459319591522217, "logits_per_char": -0.8229659795761108, "bits_per_byte": 1.1872889375556361, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 166, "native_id": "MEA_2014_8_10", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.524430274963379, "logits_per_token_corr": -1.524430274963379, "logits_per_char_corr": -0.7622151374816895, "bits_per_byte_corr": 1.0996439989361229}, "model_output": [{"sum_logits": -1.1250505447387695, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.1250505447387695, "logits_per_char": -0.5625252723693848, "bits_per_byte": 0.8115524208226079, "num_chars": 2}, {"sum_logits": -1.1170110702514648, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.1170110702514648, "logits_per_char": -0.5585055351257324, "bits_per_byte": 0.8057531658355098, "num_chars": 2}, {"sum_logits": -1.524430274963379, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.524430274963379, "logits_per_char": -0.7622151374816895, "bits_per_byte": 1.0996439989361229, "num_chars": 2}, {"sum_logits": -2.114556312561035, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -2.114556312561035, "logits_per_char": -1.0572781562805176, "bits_per_byte": 1.525329952907225, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 167, "native_id": "TIMSS_1995_8_N2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1388146877288818, "logits_per_token_corr": -1.1388146877288818, "logits_per_char_corr": -0.5694073438644409, "bits_per_byte_corr": 0.8214811512395758}, "model_output": [{"sum_logits": -1.6924645900726318, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.6924645900726318, "logits_per_char": -0.8462322950363159, "bits_per_byte": 1.2208551354898562, "num_chars": 2}, {"sum_logits": -1.1388146877288818, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.1388146877288818, "logits_per_char": -0.5694073438644409, "bits_per_byte": 0.8214811512395758, "num_chars": 2}, {"sum_logits": -1.422513723373413, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.422513723373413, "logits_per_char": -0.7112568616867065, "bits_per_byte": 1.026126747154396, "num_chars": 2}, {"sum_logits": -1.4136598110198975, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4136598110198975, "logits_per_char": -0.7068299055099487, "bits_per_byte": 1.0197399994319503, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 168, "native_id": "Mercury_7024465", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2050968408584595, "logits_per_token_corr": -1.2050968408584595, "logits_per_char_corr": -0.6025484204292297, "bits_per_byte_corr": 0.8692936180493525}, "model_output": [{"sum_logits": -1.2050968408584595, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.2050968408584595, "logits_per_char": -0.6025484204292297, "bits_per_byte": 0.8692936180493525, "num_chars": 2}, {"sum_logits": -1.1915642023086548, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.1915642023086548, "logits_per_char": -0.5957821011543274, "bits_per_byte": 0.8595318827863724, "num_chars": 2}, {"sum_logits": -1.496604561805725, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.496604561805725, "logits_per_char": -0.7483022809028625, "bits_per_byte": 1.0795719897452354, "num_chars": 2}, {"sum_logits": -1.869327187538147, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.869327187538147, "logits_per_char": -0.9346635937690735, "bits_per_byte": 1.3484345316310677, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 169, "native_id": "Mercury_SC_415762", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.047135829925537, "logits_per_token_corr": -2.047135829925537, "logits_per_char_corr": -1.0235679149627686, "bits_per_byte_corr": 1.4766963549309033}, "model_output": [{"sum_logits": -1.2616171836853027, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.2616171836853027, "logits_per_char": -0.6308085918426514, "bits_per_byte": 0.9100644272021972, "num_chars": 2}, {"sum_logits": -1.2001795768737793, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.2001795768737793, "logits_per_char": -0.6000897884368896, "bits_per_byte": 0.8657465618666299, "num_chars": 2}, {"sum_logits": -1.2839007377624512, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.2839007377624512, "logits_per_char": -0.6419503688812256, "bits_per_byte": 0.9261386136824502, "num_chars": 2}, {"sum_logits": -2.047135829925537, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -2.047135829925537, "logits_per_char": -1.0235679149627686, "bits_per_byte": 1.4766963549309033, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 170, "native_id": "Mercury_415093", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5559831857681274, "logits_per_token_corr": -1.5559831857681274, "logits_per_char_corr": -0.7779915928840637, "bits_per_byte_corr": 1.1224046129079503}, "model_output": [{"sum_logits": -1.7198225259780884, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.7198225259780884, "logits_per_char": -0.8599112629890442, "bits_per_byte": 1.2405897147197507, "num_chars": 2}, {"sum_logits": -1.276466965675354, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.276466965675354, "logits_per_char": -0.638233482837677, "bits_per_byte": 0.9207762806198695, "num_chars": 2}, {"sum_logits": -1.157854676246643, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": true, "logits_per_token": -1.157854676246643, "logits_per_char": -0.5789273381233215, "bits_per_byte": 0.8352155997461641, "num_chars": 2}, {"sum_logits": -1.5559831857681274, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.5559831857681274, "logits_per_char": -0.7779915928840637, "bits_per_byte": 1.1224046129079503, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 171, "native_id": "LEAP_2005_8_10404", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8775283098220825, "logits_per_token_corr": -1.8775283098220825, "logits_per_char_corr": -0.9387641549110413, "bits_per_byte_corr": 1.3543503908554508}, "model_output": [{"sum_logits": -1.5688315629959106, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.5688315629959106, "logits_per_char": -0.7844157814979553, "bits_per_byte": 1.131672757962954, "num_chars": 2}, {"sum_logits": -1.0917960405349731, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.0917960405349731, "logits_per_char": -0.5458980202674866, "bits_per_byte": 0.7875643666715715, "num_chars": 2}, {"sum_logits": -1.2330924272537231, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.2330924272537231, "logits_per_char": -0.6165462136268616, "bits_per_byte": 0.8894881648789796, "num_chars": 2}, {"sum_logits": -1.8775283098220825, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.8775283098220825, "logits_per_char": -0.9387641549110413, "bits_per_byte": 1.3543503908554508, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 172, "native_id": "AIMS_2008_8_6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9827288389205933, "logits_per_token_corr": -0.9827288389205933, "logits_per_char_corr": -0.49136441946029663, "bits_per_byte_corr": 0.7088890112251637}, "model_output": [{"sum_logits": -1.4119547605514526, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4119547605514526, "logits_per_char": -0.7059773802757263, "bits_per_byte": 1.018510065504304, "num_chars": 2}, {"sum_logits": -0.9827288389205933, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -0.9827288389205933, "logits_per_char": -0.49136441946029663, "bits_per_byte": 0.7088890112251637, "num_chars": 2}, {"sum_logits": -1.4098337888717651, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4098337888717651, "logits_per_char": -0.7049168944358826, "bits_per_byte": 1.0169801078422274, "num_chars": 2}, {"sum_logits": -2.0764713287353516, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -2.0764713287353516, "logits_per_char": -1.0382356643676758, "bits_per_byte": 1.4978574442583803, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 173, "native_id": "Mercury_7057173", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9868338108062744, "logits_per_token_corr": -1.9868338108062744, "logits_per_char_corr": -0.9934169054031372, "bits_per_byte_corr": 1.4331976429613962}, "model_output": [{"sum_logits": -1.4227488040924072, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4227488040924072, "logits_per_char": -0.7113744020462036, "bits_per_byte": 1.026296322048147, "num_chars": 2}, {"sum_logits": -0.996189296245575, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -0.996189296245575, "logits_per_char": -0.4980946481227875, "bits_per_byte": 0.718598678740595, "num_chars": 2}, {"sum_logits": -1.4201796054840088, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4201796054840088, "logits_per_char": -0.7100898027420044, "bits_per_byte": 1.024443037002448, "num_chars": 2}, {"sum_logits": -1.9868338108062744, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.9868338108062744, "logits_per_char": -0.9934169054031372, "bits_per_byte": 1.4331976429613962, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 174, "native_id": "TIMSS_2007_8_pg60", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2920777797698975, "logits_per_token_corr": -1.2920777797698975, "logits_per_char_corr": -0.6460388898849487, "bits_per_byte_corr": 0.9320371026590962}, "model_output": [{"sum_logits": -1.8707692623138428, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.8707692623138428, "logits_per_char": -0.9353846311569214, "bits_per_byte": 1.349474768694812, "num_chars": 2}, {"sum_logits": -1.2641446590423584, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.2641446590423584, "logits_per_char": -0.6320723295211792, "bits_per_byte": 0.9118876152839951, "num_chars": 2}, {"sum_logits": -1.2920777797698975, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.2920777797698975, "logits_per_char": -0.6460388898849487, "bits_per_byte": 0.9320371026590962, "num_chars": 2}, {"sum_logits": -1.317075490951538, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.317075490951538, "logits_per_char": -0.658537745475769, "bits_per_byte": 0.950069139636773, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 175, "native_id": "AIMS_2009_8_14", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3423495292663574, "logits_per_token_corr": -1.3423495292663574, "logits_per_char_corr": -0.6711747646331787, "bits_per_byte_corr": 0.9683005045067998}, "model_output": [{"sum_logits": -1.3423495292663574, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.3423495292663574, "logits_per_char": -0.6711747646331787, "bits_per_byte": 0.9683005045067998, "num_chars": 2}, {"sum_logits": -1.111060619354248, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": true, "logits_per_token": -1.111060619354248, "logits_per_char": -0.555530309677124, "bits_per_byte": 0.8014608228352728, "num_chars": 2}, {"sum_logits": -1.3145499229431152, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.3145499229431152, "logits_per_char": -0.6572749614715576, "bits_per_byte": 0.9482473274161819, "num_chars": 2}, {"sum_logits": -2.1086935997009277, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -2.1086935997009277, "logits_per_char": -1.0543467998504639, "bits_per_byte": 1.5211008995225055, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 176, "native_id": "Mercury_185010", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7348711490631104, "logits_per_token_corr": -1.7348711490631104, "logits_per_char_corr": -0.8674355745315552, "bits_per_byte_corr": 1.2514450016682426}, "model_output": [{"sum_logits": -1.7348711490631104, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.7348711490631104, "logits_per_char": -0.8674355745315552, "bits_per_byte": 1.2514450016682426, "num_chars": 2}, {"sum_logits": -1.0629770755767822, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.0629770755767822, "logits_per_char": -0.5314885377883911, "bits_per_byte": 0.7667758777571891, "num_chars": 2}, {"sum_logits": -1.3888704776763916, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3888704776763916, "logits_per_char": -0.6944352388381958, "bits_per_byte": 1.0018582752911278, "num_chars": 2}, {"sum_logits": -1.5369951725006104, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.5369951725006104, "logits_per_char": -0.7684975862503052, "bits_per_byte": 1.1087076566192502, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 177, "native_id": "Mercury_7206938", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3378838300704956, "logits_per_token_corr": -1.3378838300704956, "logits_per_char_corr": -0.6689419150352478, "bits_per_byte_corr": 0.9650791834648117}, "model_output": [{"sum_logits": -1.3378838300704956, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3378838300704956, "logits_per_char": -0.6689419150352478, "bits_per_byte": 0.9650791834648117, "num_chars": 2}, {"sum_logits": -1.246712565422058, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.246712565422058, "logits_per_char": -0.623356282711029, "bits_per_byte": 0.8993130177748264, "num_chars": 2}, {"sum_logits": -1.2335747480392456, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.2335747480392456, "logits_per_char": -0.6167873740196228, "bits_per_byte": 0.8898360857816754, "num_chars": 2}, {"sum_logits": -1.9449585676193237, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.9449585676193237, "logits_per_char": -0.9724792838096619, "bits_per_byte": 1.402991040120458, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 178, "native_id": "Mercury_402501", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2249194383621216, "logits_per_token_corr": -1.2249194383621216, "logits_per_char_corr": -0.6124597191810608, "bits_per_byte_corr": 0.8835925996073983}, "model_output": [{"sum_logits": -1.6750212907791138, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.6750212907791138, "logits_per_char": -0.8375106453895569, "bits_per_byte": 1.208272454796097, "num_chars": 2}, {"sum_logits": -1.2249194383621216, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.2249194383621216, "logits_per_char": -0.6124597191810608, "bits_per_byte": 0.8835925996073983, "num_chars": 2}, {"sum_logits": -1.2141410112380981, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2141410112380981, "logits_per_char": -0.6070705056190491, "bits_per_byte": 0.8758176079271869, "num_chars": 2}, {"sum_logits": -1.5799590349197388, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.5799590349197388, "logits_per_char": -0.7899795174598694, "bits_per_byte": 1.1396995322440286, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 179, "native_id": "MCAS_2011_8_15365", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.233945608139038, "logits_per_token_corr": -1.233945608139038, "logits_per_char_corr": -0.616972804069519, "bits_per_byte_corr": 0.8901036047950927}, "model_output": [{"sum_logits": -1.598325490951538, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.598325490951538, "logits_per_char": -0.799162745475769, "bits_per_byte": 1.1529481297619293, "num_chars": 2}, {"sum_logits": -1.1231896877288818, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.1231896877288818, "logits_per_char": -0.5615948438644409, "bits_per_byte": 0.8102100962326227, "num_chars": 2}, {"sum_logits": -1.233945608139038, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.233945608139038, "logits_per_char": -0.616972804069519, "bits_per_byte": 0.8901036047950927, "num_chars": 2}, {"sum_logits": -1.7845113277435303, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.7845113277435303, "logits_per_char": -0.8922556638717651, "bits_per_byte": 1.2872528214738101, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 180, "native_id": "Mercury_SC_401766", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.457045555114746, "logits_per_token_corr": -1.457045555114746, "logits_per_char_corr": -0.728522777557373, "bits_per_byte_corr": 1.0510361983574306}, "model_output": [{"sum_logits": -1.457045555114746, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.457045555114746, "logits_per_char": -0.728522777557373, "bits_per_byte": 1.0510361983574306, "num_chars": 2}, {"sum_logits": -1.0818052291870117, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.0818052291870117, "logits_per_char": -0.5409026145935059, "bits_per_byte": 0.7803575196784858, "num_chars": 2}, {"sum_logits": -1.4331045150756836, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4331045150756836, "logits_per_char": -0.7165522575378418, "bits_per_byte": 1.0337663884883783, "num_chars": 2}, {"sum_logits": -1.722275733947754, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.722275733947754, "logits_per_char": -0.861137866973877, "bits_per_byte": 1.2423593302058047, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 181, "native_id": "Mercury_7162400", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5232092142105103, "logits_per_token_corr": -1.5232092142105103, "logits_per_char_corr": -0.7616046071052551, "bits_per_byte_corr": 1.0987631897897283}, "model_output": [{"sum_logits": -1.6090360879898071, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.6090360879898071, "logits_per_char": -0.8045180439949036, "bits_per_byte": 1.1606741923779702, "num_chars": 2}, {"sum_logits": -1.2825857400894165, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.2825857400894165, "logits_per_char": -0.6412928700447083, "bits_per_byte": 0.9251900433716158, "num_chars": 2}, {"sum_logits": -1.2457491159439087, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": true, "logits_per_token": -1.2457491159439087, "logits_per_char": -0.6228745579719543, "bits_per_byte": 0.8986180358826893, "num_chars": 2}, {"sum_logits": -1.5232092142105103, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.5232092142105103, "logits_per_char": -0.7616046071052551, "bits_per_byte": 1.0987631897897283, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 182, "native_id": "Mercury_7086695", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.282909870147705, "logits_per_token_corr": -1.282909870147705, "logits_per_char_corr": -0.6414549350738525, "bits_per_byte_corr": 0.9254238537854639}, "model_output": [{"sum_logits": -1.4126152992248535, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4126152992248535, "logits_per_char": -0.7063076496124268, "bits_per_byte": 1.0189865434385197, "num_chars": 2}, {"sum_logits": -1.240736484527588, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.240736484527588, "logits_per_char": -0.620368242263794, "bits_per_byte": 0.8950021866396216, "num_chars": 2}, {"sum_logits": -1.282909870147705, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.282909870147705, "logits_per_char": -0.6414549350738525, "bits_per_byte": 0.9254238537854639, "num_chars": 2}, {"sum_logits": -1.7326264381408691, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.7326264381408691, "logits_per_char": -0.8663132190704346, "bits_per_byte": 1.249825785010368, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 183, "native_id": "Mercury_SC_402994", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4160524606704712, "logits_per_token_corr": -1.4160524606704712, "logits_per_char_corr": -0.7080262303352356, "bits_per_byte_corr": 1.0214659313246852}, "model_output": [{"sum_logits": -1.4426466226577759, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4426466226577759, "logits_per_char": -0.7213233113288879, "bits_per_byte": 1.0406495641325402, "num_chars": 2}, {"sum_logits": -1.0454565286636353, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.0454565286636353, "logits_per_char": -0.5227282643318176, "bits_per_byte": 0.7541374746845503, "num_chars": 2}, {"sum_logits": -1.4160524606704712, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4160524606704712, "logits_per_char": -0.7080262303352356, "bits_per_byte": 1.0214659313246852, "num_chars": 2}, {"sum_logits": -1.8383280038833618, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.8383280038833618, "logits_per_char": -0.9191640019416809, "bits_per_byte": 1.3260733473658692, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 184, "native_id": "Mercury_7056298", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4940485954284668, "logits_per_token_corr": -1.4940485954284668, "logits_per_char_corr": -0.7470242977142334, "bits_per_byte_corr": 1.0777282497366594}, "model_output": [{"sum_logits": -1.4940485954284668, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4940485954284668, "logits_per_char": -0.7470242977142334, "bits_per_byte": 1.0777282497366594, "num_chars": 2}, {"sum_logits": -1.1529784202575684, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.1529784202575684, "logits_per_char": -0.5764892101287842, "bits_per_byte": 0.8316981245793901, "num_chars": 2}, {"sum_logits": -1.3449187278747559, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.3449187278747559, "logits_per_char": -0.6724593639373779, "bits_per_byte": 0.9701537895524988, "num_chars": 2}, {"sum_logits": -1.6812529563903809, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.6812529563903809, "logits_per_char": -0.8406264781951904, "bits_per_byte": 1.2127676513330268, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 185, "native_id": "Mercury_409115", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4829964637756348, "logits_per_token_corr": -1.4829964637756348, "logits_per_char_corr": -0.7414982318878174, "bits_per_byte_corr": 1.0697558219732572}, "model_output": [{"sum_logits": -1.4786343574523926, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.4786343574523926, "logits_per_char": -0.7393171787261963, "bits_per_byte": 1.0666092273930692, "num_chars": 2}, {"sum_logits": -1.1675763130187988, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": true, "logits_per_token": -1.1675763130187988, "logits_per_char": -0.5837881565093994, "bits_per_byte": 0.8422282783264257, "num_chars": 2}, {"sum_logits": -1.4829964637756348, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.4829964637756348, "logits_per_char": -0.7414982318878174, "bits_per_byte": 1.0697558219732572, "num_chars": 2}, {"sum_logits": -1.5333237648010254, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.5333237648010254, "logits_per_char": -0.7666618824005127, "bits_per_byte": 1.106059295778612, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 186, "native_id": "Mercury_409647", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7671964168548584, "logits_per_token_corr": -1.7671964168548584, "logits_per_char_corr": -0.8835982084274292, "bits_per_byte_corr": 1.2747627534375405}, "model_output": [{"sum_logits": -1.5006520748138428, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": false, "logits_per_token": -1.5006520748138428, "logits_per_char": -0.7503260374069214, "bits_per_byte": 1.08249165321761, "num_chars": 2}, {"sum_logits": -1.085071325302124, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": true, "logits_per_token": -1.085071325302124, "logits_per_char": -0.542535662651062, "bits_per_byte": 0.7827135100126571, "num_chars": 2}, {"sum_logits": -1.3803269863128662, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": false, "logits_per_token": -1.3803269863128662, "logits_per_char": -0.6901634931564331, "bits_per_byte": 0.9956954489801054, "num_chars": 2}, {"sum_logits": -1.7671964168548584, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": false, "logits_per_token": -1.7671964168548584, "logits_per_char": -0.8835982084274292, "bits_per_byte": 1.2747627534375405, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 187, "native_id": "Mercury_414352", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3272091150283813, "logits_per_token_corr": -1.3272091150283813, "logits_per_char_corr": -0.6636045575141907, "bits_per_byte_corr": 0.9573790042377256}, "model_output": [{"sum_logits": -1.3272091150283813, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.3272091150283813, "logits_per_char": -0.6636045575141907, "bits_per_byte": 0.9573790042377256, "num_chars": 2}, {"sum_logits": -1.0715728998184204, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.0715728998184204, "logits_per_char": -0.5357864499092102, "bits_per_byte": 0.7729764542600759, "num_chars": 2}, {"sum_logits": -1.5124584436416626, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.5124584436416626, "logits_per_char": -0.7562292218208313, "bits_per_byte": 1.091008148097017, "num_chars": 2}, {"sum_logits": -1.8772941827774048, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.8772941827774048, "logits_per_char": -0.9386470913887024, "bits_per_byte": 1.3541815038923035, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 188, "native_id": "Mercury_185325", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2763946056365967, "logits_per_token_corr": -1.2763946056365967, "logits_per_char_corr": -0.6381973028182983, "bits_per_byte_corr": 0.9207240838853326}, "model_output": [{"sum_logits": -1.4178264141082764, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4178264141082764, "logits_per_char": -0.7089132070541382, "bits_per_byte": 1.0227455682384308, "num_chars": 2}, {"sum_logits": -1.2389838695526123, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.2389838695526123, "logits_per_char": -0.6194919347763062, "bits_per_byte": 0.8937379421731282, "num_chars": 2}, {"sum_logits": -1.2763946056365967, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.2763946056365967, "logits_per_char": -0.6381973028182983, "bits_per_byte": 0.9207240838853326, "num_chars": 2}, {"sum_logits": -1.745518445968628, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.745518445968628, "logits_per_char": -0.872759222984314, "bits_per_byte": 1.2591254028904795, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 189, "native_id": "Mercury_SC_412374", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3284437656402588, "logits_per_token_corr": -1.3284437656402588, "logits_per_char_corr": -0.6642218828201294, "bits_per_byte_corr": 0.9582696163952193}, "model_output": [{"sum_logits": -1.3955614566802979, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3955614566802979, "logits_per_char": -0.6977807283401489, "bits_per_byte": 1.006684796404945, "num_chars": 2}, {"sum_logits": -1.123810052871704, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.123810052871704, "logits_per_char": -0.561905026435852, "bits_per_byte": 0.810657595090168, "num_chars": 2}, {"sum_logits": -1.3284437656402588, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3284437656402588, "logits_per_char": -0.6642218828201294, "bits_per_byte": 0.9582696163952193, "num_chars": 2}, {"sum_logits": -1.893799066543579, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.893799066543579, "logits_per_char": -0.9468995332717896, "bits_per_byte": 1.3660872608722663, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 190, "native_id": "Mercury_SC_401818", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0567214488983154, "logits_per_token_corr": -1.0567214488983154, "logits_per_char_corr": -0.5283607244491577, "bits_per_byte_corr": 0.7622633969638476}, "model_output": [{"sum_logits": -1.8797423839569092, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.8797423839569092, "logits_per_char": -0.9398711919784546, "bits_per_byte": 1.3559475077426895, "num_chars": 2}, {"sum_logits": -1.2297751903533936, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.2297751903533936, "logits_per_char": -0.6148875951766968, "bits_per_byte": 0.8870952842661982, "num_chars": 2}, {"sum_logits": -1.0567214488983154, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.0567214488983154, "logits_per_char": -0.5283607244491577, "bits_per_byte": 0.7622633969638476, "num_chars": 2}, {"sum_logits": -1.6499431133270264, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.6499431133270264, "logits_per_char": -0.8249715566635132, "bits_per_byte": 1.190182373673754, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 191, "native_id": "Mercury_SC_413549", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3446948528289795, "logits_per_token_corr": -1.3446948528289795, "logits_per_char_corr": -0.6723474264144897, "bits_per_byte_corr": 0.9699922978433385}, "model_output": [{"sum_logits": -1.4708411693572998, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4708411693572998, "logits_per_char": -0.7354205846786499, "bits_per_byte": 1.0609876304843124, "num_chars": 2}, {"sum_logits": -1.3446948528289795, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.3446948528289795, "logits_per_char": -0.6723474264144897, "bits_per_byte": 0.9699922978433385, "num_chars": 2}, {"sum_logits": -1.4251773357391357, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4251773357391357, "logits_per_char": -0.7125886678695679, "bits_per_byte": 1.0280481373298367, "num_chars": 2}, {"sum_logits": -1.5283229351043701, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.5283229351043701, "logits_per_char": -0.7641614675521851, "bits_per_byte": 1.102451959676762, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 192, "native_id": "Mercury_7093958", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4452552795410156, "logits_per_token_corr": -1.4452552795410156, "logits_per_char_corr": -0.7226276397705078, "bits_per_byte_corr": 1.0425313123069568}, "model_output": [{"sum_logits": -1.4452552795410156, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4452552795410156, "logits_per_char": -0.7226276397705078, "bits_per_byte": 1.0425313123069568, "num_chars": 2}, {"sum_logits": -1.197723388671875, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.197723388671875, "logits_per_char": -0.5988616943359375, "bits_per_byte": 0.8639747965974399, "num_chars": 2}, {"sum_logits": -1.3442249298095703, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3442249298095703, "logits_per_char": -0.6721124649047852, "bits_per_byte": 0.9696533200384876, "num_chars": 2}, {"sum_logits": -1.665719985961914, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.665719985961914, "logits_per_char": -0.832859992980957, "bits_per_byte": 1.2015629816293067, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 193, "native_id": "Mercury_7102323", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8396371603012085, "logits_per_token_corr": -1.8396371603012085, "logits_per_char_corr": -0.9198185801506042, "bits_per_byte_corr": 1.3270177041017577}, "model_output": [{"sum_logits": -1.2934545278549194, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.2934545278549194, "logits_per_char": -0.6467272639274597, "bits_per_byte": 0.9330302164765042, "num_chars": 2}, {"sum_logits": -1.3044694662094116, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3044694662094116, "logits_per_char": -0.6522347331047058, "bits_per_byte": 0.9409758149463717, "num_chars": 2}, {"sum_logits": -1.294476866722107, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.294476866722107, "logits_per_char": -0.6472384333610535, "bits_per_byte": 0.9337676780834046, "num_chars": 2}, {"sum_logits": -1.8396371603012085, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.8396371603012085, "logits_per_char": -0.9198185801506042, "bits_per_byte": 1.3270177041017577, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 194, "native_id": "Mercury_7222793", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0792261362075806, "logits_per_token_corr": -1.0792261362075806, "logits_per_char_corr": -0.5396130681037903, "bits_per_byte_corr": 0.7784970973527761}, "model_output": [{"sum_logits": -1.557564377784729, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.557564377784729, "logits_per_char": -0.7787821888923645, "bits_per_byte": 1.1235452018484735, "num_chars": 2}, {"sum_logits": -1.0792261362075806, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -1.0792261362075806, "logits_per_char": -0.5396130681037903, "bits_per_byte": 0.7784970973527761, "num_chars": 2}, {"sum_logits": -1.149553894996643, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.149553894996643, "logits_per_char": -0.5747769474983215, "bits_per_byte": 0.8292278517737203, "num_chars": 2}, {"sum_logits": -2.0935521125793457, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -2.0935521125793457, "logits_per_char": -1.0467760562896729, "bits_per_byte": 1.5101786253315024, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 195, "native_id": "Mercury_SC_400701", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.668203592300415, "logits_per_token_corr": -1.668203592300415, "logits_per_char_corr": -0.8341017961502075, "bits_per_byte_corr": 1.203354524903346}, "model_output": [{"sum_logits": -1.668203592300415, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.668203592300415, "logits_per_char": -0.8341017961502075, "bits_per_byte": 1.203354524903346, "num_chars": 2}, {"sum_logits": -1.1825335025787354, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.1825335025787354, "logits_per_char": -0.5912667512893677, "bits_per_byte": 0.8530176099283117, "num_chars": 2}, {"sum_logits": -1.2282640933990479, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.2282640933990479, "logits_per_char": -0.6141320466995239, "bits_per_byte": 0.886005258225029, "num_chars": 2}, {"sum_logits": -1.6130411624908447, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.6130411624908447, "logits_per_char": -0.8065205812454224, "bits_per_byte": 1.163563242938491, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 196, "native_id": "Mercury_409301", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.05851411819458, "logits_per_token_corr": -1.05851411819458, "logits_per_char_corr": -0.52925705909729, "bits_per_byte_corr": 0.763556534515686}, "model_output": [{"sum_logits": -1.35508394241333, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.35508394241333, "logits_per_char": -0.677541971206665, "bits_per_byte": 0.9774864418546908, "num_chars": 2}, {"sum_logits": -1.05851411819458, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": true, "logits_per_token": -1.05851411819458, "logits_per_char": -0.52925705909729, "bits_per_byte": 0.763556534515686, "num_chars": 2}, {"sum_logits": -1.4991211891174316, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.4991211891174316, "logits_per_char": -0.7495605945587158, "bits_per_byte": 1.081387352616419, "num_chars": 2}, {"sum_logits": -1.89011812210083, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.89011812210083, "logits_per_char": -0.945059061050415, "bits_per_byte": 1.3634320207255934, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 197, "native_id": "Mercury_SC_400383", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3449074029922485, "logits_per_token_corr": -1.3449074029922485, "logits_per_char_corr": -0.6724537014961243, "bits_per_byte_corr": 0.9701456203765828}, "model_output": [{"sum_logits": -1.4915214776992798, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4915214776992798, "logits_per_char": -0.7457607388496399, "bits_per_byte": 1.0759053196288377, "num_chars": 2}, {"sum_logits": -1.1066890954971313, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.1066890954971313, "logits_per_char": -0.5533445477485657, "bits_per_byte": 0.7983074349403755, "num_chars": 2}, {"sum_logits": -1.3449074029922485, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3449074029922485, "logits_per_char": -0.6724537014961243, "bits_per_byte": 0.9701456203765828, "num_chars": 2}, {"sum_logits": -1.7897716760635376, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.7897716760635376, "logits_per_char": -0.8948858380317688, "bits_per_byte": 1.2910473606911246, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 198, "native_id": "CSZ_2005_5_CSZ10021", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1070529222488403, "logits_per_token_corr": -1.1070529222488403, "logits_per_char_corr": -0.5535264611244202, "bits_per_byte_corr": 0.7985698804655923}, "model_output": [{"sum_logits": -1.1187907457351685, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.1187907457351685, "logits_per_char": -0.5593953728675842, "bits_per_byte": 0.8070369303328762, "num_chars": 2}, {"sum_logits": -1.1070529222488403, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.1070529222488403, "logits_per_char": -0.5535264611244202, "bits_per_byte": 0.7985698804655923, "num_chars": 2}, {"sum_logits": -1.5185405015945435, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.5185405015945435, "logits_per_char": -0.7592702507972717, "bits_per_byte": 1.0953954255205305, "num_chars": 2}, {"sum_logits": -2.1467695236206055, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -2.1467695236206055, "logits_per_char": -1.0733847618103027, "bits_per_byte": 1.5485668728306174, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 199, "native_id": "Mercury_SC_407070", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1411316394805908, "logits_per_token_corr": -1.1411316394805908, "logits_per_char_corr": -0.5705658197402954, "bits_per_byte_corr": 0.8231524786406618}, "model_output": [{"sum_logits": -1.203324556350708, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.203324556350708, "logits_per_char": -0.601662278175354, "bits_per_byte": 0.8680151850141629, "num_chars": 2}, {"sum_logits": -1.1411316394805908, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.1411316394805908, "logits_per_char": -0.5705658197402954, "bits_per_byte": 0.8231524786406618, "num_chars": 2}, {"sum_logits": -1.3733875751495361, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3733875751495361, "logits_per_char": -0.6866937875747681, "bits_per_byte": 0.9906897219440889, "num_chars": 2}, {"sum_logits": -2.1464264392852783, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -2.1464264392852783, "logits_per_char": -1.0732132196426392, "bits_per_byte": 1.5483193897960257, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 200, "native_id": "Mercury_SC_400708", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5170142650604248, "logits_per_token_corr": -1.5170142650604248, "logits_per_char_corr": -0.7585071325302124, "bits_per_byte_corr": 1.0942944785810313}, "model_output": [{"sum_logits": -1.321181058883667, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.321181058883667, "logits_per_char": -0.6605905294418335, "bits_per_byte": 0.9530306808846326, "num_chars": 2}, {"sum_logits": -0.996208906173706, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -0.996208906173706, "logits_per_char": -0.498104453086853, "bits_per_byte": 0.7186128243136285, "num_chars": 2}, {"sum_logits": -1.5170142650604248, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.5170142650604248, "logits_per_char": -0.7585071325302124, "bits_per_byte": 1.0942944785810313, "num_chars": 2}, {"sum_logits": -2.013563871383667, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -2.013563871383667, "logits_per_char": -1.0067819356918335, "bits_per_byte": 1.452479305880243, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 201, "native_id": "Mercury_7075040", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3914282321929932, "logits_per_token_corr": -1.3914282321929932, "logits_per_char_corr": -0.6957141160964966, "bits_per_byte_corr": 1.0037033051695852}, "model_output": [{"sum_logits": -1.2034533023834229, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.2034533023834229, "logits_per_char": -0.6017266511917114, "bits_per_byte": 0.8681080556456288, "num_chars": 2}, {"sum_logits": -1.2053892612457275, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.2053892612457275, "logits_per_char": -0.6026946306228638, "bits_per_byte": 0.8695045547706358, "num_chars": 2}, {"sum_logits": -1.3914282321929932, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3914282321929932, "logits_per_char": -0.6957141160964966, "bits_per_byte": 1.0037033051695852, "num_chars": 2}, {"sum_logits": -1.9516661167144775, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.9516661167144775, "logits_per_char": -0.9758330583572388, "bits_per_byte": 1.4078295140285102, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 202, "native_id": "Mercury_7137165", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3843512535095215, "logits_per_token_corr": -1.3843512535095215, "logits_per_char_corr": -0.6921756267547607, "bits_per_byte_corr": 0.9985983441440208}, "model_output": [{"sum_logits": -1.3843512535095215, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.3843512535095215, "logits_per_char": -0.6921756267547607, "bits_per_byte": 0.9985983441440208, "num_chars": 2}, {"sum_logits": -1.3214449882507324, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.3214449882507324, "logits_per_char": -0.6607224941253662, "bits_per_byte": 0.9532210656791379, "num_chars": 2}, {"sum_logits": -1.2892851829528809, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.2892851829528809, "logits_per_char": -0.6446425914764404, "bits_per_byte": 0.9300226698695387, "num_chars": 2}, {"sum_logits": -1.6756796836853027, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.6756796836853027, "logits_per_char": -0.8378398418426514, "bits_per_byte": 1.208747384886455, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 203, "native_id": "Mercury_SC_400046", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0118045806884766, "logits_per_token_corr": -1.0118045806884766, "logits_per_char_corr": -0.5059022903442383, "bits_per_byte_corr": 0.7298627254545255}, "model_output": [{"sum_logits": -1.2282257080078125, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.2282257080078125, "logits_per_char": -0.6141128540039062, "bits_per_byte": 0.8859775690182401, "num_chars": 2}, {"sum_logits": -1.0118045806884766, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.0118045806884766, "logits_per_char": -0.5059022903442383, "bits_per_byte": 0.7298627254545255, "num_chars": 2}, {"sum_logits": -1.4296951293945312, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4296951293945312, "logits_per_char": -0.7148475646972656, "bits_per_byte": 1.0313070365810384, "num_chars": 2}, {"sum_logits": -2.349363327026367, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -2.349363327026367, "logits_per_char": -1.1746816635131836, "bits_per_byte": 1.6947074105748856, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 204, "native_id": "Mercury_7099330", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6713403463363647, "logits_per_token_corr": -1.6713403463363647, "logits_per_char_corr": -0.8356701731681824, "bits_per_byte_corr": 1.2056172146494242}, "model_output": [{"sum_logits": -1.2937463521957397, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.2937463521957397, "logits_per_char": -0.6468731760978699, "bits_per_byte": 0.9332407232411605, "num_chars": 2}, {"sum_logits": -1.1702169179916382, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.1702169179916382, "logits_per_char": -0.5851084589958191, "bits_per_byte": 0.844133072176058, "num_chars": 2}, {"sum_logits": -1.5505479574203491, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.5505479574203491, "logits_per_char": -0.7752739787101746, "bits_per_byte": 1.1184839244162281, "num_chars": 2}, {"sum_logits": -1.6713403463363647, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.6713403463363647, "logits_per_char": -0.8356701731681824, "bits_per_byte": 1.2056172146494242, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 205, "native_id": "MDSA_2007_5_2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2327461242675781, "logits_per_token_corr": -1.2327461242675781, "logits_per_char_corr": -0.6163730621337891, "bits_per_byte_corr": 0.8892383600786012}, "model_output": [{"sum_logits": -1.8489723205566406, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.8489723205566406, "logits_per_char": -0.9244861602783203, "bits_per_byte": 1.3337515988049704, "num_chars": 2}, {"sum_logits": -1.3430938720703125, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3430938720703125, "logits_per_char": -0.6715469360351562, "bits_per_byte": 0.9688374343427939, "num_chars": 2}, {"sum_logits": -1.2327461242675781, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.2327461242675781, "logits_per_char": -0.6163730621337891, "bits_per_byte": 0.8892383600786012, "num_chars": 2}, {"sum_logits": -1.4394092559814453, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4394092559814453, "logits_per_char": -0.7197046279907227, "bits_per_byte": 1.0383142977077977, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 206, "native_id": "Mercury_7271758", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9570232033729553, "logits_per_token_corr": -0.9570232033729553, "logits_per_char_corr": -0.47851160168647766, "bits_per_byte_corr": 0.6903463147614123}, "model_output": [{"sum_logits": -1.4108061790466309, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4108061790466309, "logits_per_char": -0.7054030895233154, "bits_per_byte": 1.017681539083772, "num_chars": 2}, {"sum_logits": -0.9570232033729553, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -0.9570232033729553, "logits_per_char": -0.47851160168647766, "bits_per_byte": 0.6903463147614123, "num_chars": 2}, {"sum_logits": -1.4049181938171387, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4049181938171387, "logits_per_char": -0.7024590969085693, "bits_per_byte": 1.0134342555380609, "num_chars": 2}, {"sum_logits": -2.1315932273864746, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -2.1315932273864746, "logits_per_char": -1.0657966136932373, "bits_per_byte": 1.5376194891725885, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 207, "native_id": "MCAS_2003_8_31", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.415809988975525, "logits_per_token_corr": -1.415809988975525, "logits_per_char_corr": -0.7079049944877625, "bits_per_byte_corr": 1.0212910249687577}, "model_output": [{"sum_logits": -1.3585189580917358, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.3585189580917358, "logits_per_char": -0.6792594790458679, "bits_per_byte": 0.9799642818969985, "num_chars": 2}, {"sum_logits": -1.0542815923690796, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": true, "logits_per_token": -1.0542815923690796, "logits_per_char": -0.5271407961845398, "bits_per_byte": 0.7605034125062418, "num_chars": 2}, {"sum_logits": -1.415809988975525, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.415809988975525, "logits_per_char": -0.7079049944877625, "bits_per_byte": 1.0212910249687577, "num_chars": 2}, {"sum_logits": -1.975897192955017, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.975897192955017, "logits_per_char": -0.9879485964775085, "bits_per_byte": 1.4253085407923372, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 208, "native_id": "AKDE&ED_2008_8_53", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4253021478652954, "logits_per_token_corr": -1.4253021478652954, "logits_per_char_corr": -0.7126510739326477, "bits_per_byte_corr": 1.0281381702475636}, "model_output": [{"sum_logits": -1.2986904382705688, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.2986904382705688, "logits_per_char": -0.6493452191352844, "bits_per_byte": 0.9368071274721052, "num_chars": 2}, {"sum_logits": -0.974795937538147, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -0.974795937538147, "logits_per_char": -0.4873979687690735, "bits_per_byte": 0.7031666324830014, "num_chars": 2}, {"sum_logits": -1.4253021478652954, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4253021478652954, "logits_per_char": -0.7126510739326477, "bits_per_byte": 1.0281381702475636, "num_chars": 2}, {"sum_logits": -2.3058371543884277, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -2.3058371543884277, "logits_per_char": -1.1529185771942139, "bits_per_byte": 1.663309913868047, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 209, "native_id": "TIMSS_2007_8_pg109", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2723469734191895, "logits_per_token_corr": -1.2723469734191895, "logits_per_char_corr": -0.6361734867095947, "bits_per_byte_corr": 0.9178043344216326}, "model_output": [{"sum_logits": -1.3030858039855957, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3030858039855957, "logits_per_char": -0.6515429019927979, "bits_per_byte": 0.9399777136320887, "num_chars": 2}, {"sum_logits": -1.137728214263916, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.137728214263916, "logits_per_char": -0.568864107131958, "bits_per_byte": 0.8206974262995934, "num_chars": 2}, {"sum_logits": -1.2723469734191895, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.2723469734191895, "logits_per_char": -0.6361734867095947, "bits_per_byte": 0.9178043344216326, "num_chars": 2}, {"sum_logits": -2.185265064239502, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -2.185265064239502, "logits_per_char": -1.092632532119751, "bits_per_byte": 1.5763355356042483, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 210, "native_id": "Mercury_175385", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4042317867279053, "logits_per_token_corr": -1.4042317867279053, "logits_per_char_corr": -0.7021158933639526, "bits_per_byte_corr": 1.0129391174862266}, "model_output": [{"sum_logits": -1.3829286098480225, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3829286098480225, "logits_per_char": -0.6914643049240112, "bits_per_byte": 0.9975721236663218, "num_chars": 2}, {"sum_logits": -1.0430810451507568, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.0430810451507568, "logits_per_char": -0.5215405225753784, "bits_per_byte": 0.7524239255426775, "num_chars": 2}, {"sum_logits": -1.4042317867279053, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4042317867279053, "logits_per_char": -0.7021158933639526, "bits_per_byte": 1.0129391174862266, "num_chars": 2}, {"sum_logits": -1.9546430110931396, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.9546430110931396, "logits_per_char": -0.9773215055465698, "bits_per_byte": 1.409976889407185, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 211, "native_id": "Mercury_410669", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.183671474456787, "logits_per_token_corr": -1.183671474456787, "logits_per_char_corr": -0.5918357372283936, "bits_per_byte_corr": 0.8538384831208805}, "model_output": [{"sum_logits": -2.0716910362243652, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -2.0716910362243652, "logits_per_char": -1.0358455181121826, "bits_per_byte": 1.4944091921085785, "num_chars": 2}, {"sum_logits": -1.4582419395446777, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.4582419395446777, "logits_per_char": -0.7291209697723389, "bits_per_byte": 1.0518992072994608, "num_chars": 2}, {"sum_logits": -1.2290854454040527, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.2290854454040527, "logits_per_char": -0.6145427227020264, "bits_per_byte": 0.8865977384572518, "num_chars": 2}, {"sum_logits": -1.183671474456787, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": true, "logits_per_token": -1.183671474456787, "logits_per_char": -0.5918357372283936, "bits_per_byte": 0.8538384831208805, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 212, "native_id": "MEAP_2005_8_39", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4585903882980347, "logits_per_token_corr": -1.4585903882980347, "logits_per_char_corr": -0.7292951941490173, "bits_per_byte_corr": 1.0521505599436969}, "model_output": [{"sum_logits": -1.287300944328308, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.287300944328308, "logits_per_char": -0.643650472164154, "bits_per_byte": 0.9285913442577319, "num_chars": 2}, {"sum_logits": -1.1537541151046753, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.1537541151046753, "logits_per_char": -0.5768770575523376, "bits_per_byte": 0.8322576701339726, "num_chars": 2}, {"sum_logits": -1.4585903882980347, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4585903882980347, "logits_per_char": -0.7292951941490173, "bits_per_byte": 1.0521505599436969, "num_chars": 2}, {"sum_logits": -1.8349560499191284, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.8349560499191284, "logits_per_char": -0.9174780249595642, "bits_per_byte": 1.323640996734715, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 213, "native_id": "Mercury_SC_408568", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2146912813186646, "logits_per_token_corr": -1.2146912813186646, "logits_per_char_corr": -0.6073456406593323, "bits_per_byte_corr": 0.8762145438853786}, "model_output": [{"sum_logits": -1.3431397676467896, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.3431397676467896, "logits_per_char": -0.6715698838233948, "bits_per_byte": 0.968870541003085, "num_chars": 2}, {"sum_logits": -1.2146912813186646, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.2146912813186646, "logits_per_char": -0.6073456406593323, "bits_per_byte": 0.8762145438853786, "num_chars": 2}, {"sum_logits": -1.2726613283157349, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.2726613283157349, "logits_per_char": -0.6363306641578674, "bits_per_byte": 0.9180310935467954, "num_chars": 2}, {"sum_logits": -1.9320698976516724, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.9320698976516724, "logits_per_char": -0.9660349488258362, "bits_per_byte": 1.3936938299974586, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 214, "native_id": "AKDE&ED_2008_8_7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.005072832107544, "logits_per_token_corr": -1.005072832107544, "logits_per_char_corr": -0.502536416053772, "bits_per_byte_corr": 0.7250067953074105}, "model_output": [{"sum_logits": -1.4017536640167236, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.4017536640167236, "logits_per_char": -0.7008768320083618, "bits_per_byte": 1.011151529813157, "num_chars": 2}, {"sum_logits": -1.005072832107544, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": true, "logits_per_token": -1.005072832107544, "logits_per_char": -0.502536416053772, "bits_per_byte": 0.7250067953074105, "num_chars": 2}, {"sum_logits": -1.3775112628936768, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.3775112628936768, "logits_per_char": -0.6887556314468384, "bits_per_byte": 0.9936643338734142, "num_chars": 2}, {"sum_logits": -2.073840379714966, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -2.073840379714966, "logits_per_char": -1.036920189857483, "bits_per_byte": 1.4959596157061077, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 215, "native_id": "Mercury_7082845", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3234500885009766, "logits_per_token_corr": -1.3234500885009766, "logits_per_char_corr": -0.6617250442504883, "bits_per_byte_corr": 0.9546674397728951}, "model_output": [{"sum_logits": -1.1996402740478516, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.1996402740478516, "logits_per_char": -0.5998201370239258, "bits_per_byte": 0.865357537110378, "num_chars": 2}, {"sum_logits": -1.3234500885009766, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3234500885009766, "logits_per_char": -0.6617250442504883, "bits_per_byte": 0.9546674397728951, "num_chars": 2}, {"sum_logits": -1.421518325805664, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.421518325805664, "logits_per_char": -0.710759162902832, "bits_per_byte": 1.0254087195870434, "num_chars": 2}, {"sum_logits": -1.7424812316894531, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.7424812316894531, "logits_per_char": -0.8712406158447266, "bits_per_byte": 1.2569345159011365, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 216, "native_id": "Mercury_SC_405726", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5973073244094849, "logits_per_token_corr": -1.5973073244094849, "logits_per_char_corr": -0.7986536622047424, "bits_per_byte_corr": 1.1522136778514191}, "model_output": [{"sum_logits": -1.4575673341751099, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4575673341751099, "logits_per_char": -0.7287836670875549, "bits_per_byte": 1.051412582388844, "num_chars": 2}, {"sum_logits": -1.2789822816848755, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.2789822816848755, "logits_per_char": -0.6394911408424377, "bits_per_byte": 0.9225906975864734, "num_chars": 2}, {"sum_logits": -1.2962228059768677, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.2962228059768677, "logits_per_char": -0.6481114029884338, "bits_per_byte": 0.9350271070356737, "num_chars": 2}, {"sum_logits": -1.5973073244094849, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.5973073244094849, "logits_per_char": -0.7986536622047424, "bits_per_byte": 1.1522136778514191, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 217, "native_id": "Mercury_SC_415407", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3193151950836182, "logits_per_token_corr": -1.3193151950836182, "logits_per_char_corr": -0.6596575975418091, "bits_per_byte_corr": 0.9516847446589793}, "model_output": [{"sum_logits": -1.3193151950836182, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3193151950836182, "logits_per_char": -0.6596575975418091, "bits_per_byte": 0.9516847446589793, "num_chars": 2}, {"sum_logits": -1.0093348026275635, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.0093348026275635, "logits_per_char": -0.5046674013137817, "bits_per_byte": 0.7280811571742364, "num_chars": 2}, {"sum_logits": -1.4133875370025635, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4133875370025635, "logits_per_char": -0.7066937685012817, "bits_per_byte": 1.0195435952446648, "num_chars": 2}, {"sum_logits": -2.1701338291168213, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -2.1701338291168213, "logits_per_char": -1.0850669145584106, "bits_per_byte": 1.5654206566672322, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 218, "native_id": "Mercury_SC_401792", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3920478820800781, "logits_per_token_corr": -1.3920478820800781, "logits_per_char_corr": -0.6960239410400391, "bits_per_byte_corr": 1.004150288079178}, "model_output": [{"sum_logits": -1.333322525024414, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.333322525024414, "logits_per_char": -0.666661262512207, "bits_per_byte": 0.9617888973798274, "num_chars": 2}, {"sum_logits": -1.186859130859375, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": true, "logits_per_token": -1.186859130859375, "logits_per_char": -0.5934295654296875, "bits_per_byte": 0.8561378911629178, "num_chars": 2}, {"sum_logits": -1.3920478820800781, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.3920478820800781, "logits_per_char": -0.6960239410400391, "bits_per_byte": 1.004150288079178, "num_chars": 2}, {"sum_logits": -1.8038806915283203, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.8038806915283203, "logits_per_char": -0.9019403457641602, "bits_per_byte": 1.3012248640125657, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 219, "native_id": "LEAP_2000_8_4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1455382108688354, "logits_per_token_corr": -1.1455382108688354, "logits_per_char_corr": -0.5727691054344177, "bits_per_byte_corr": 0.826331147985236}, "model_output": [{"sum_logits": -1.3620909452438354, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3620909452438354, "logits_per_char": -0.6810454726219177, "bits_per_byte": 0.9825409259722269, "num_chars": 2}, {"sum_logits": -1.1455382108688354, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.1455382108688354, "logits_per_char": -0.5727691054344177, "bits_per_byte": 0.826331147985236, "num_chars": 2}, {"sum_logits": -1.4109247922897339, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4109247922897339, "logits_per_char": -0.7054623961448669, "bits_per_byte": 1.0177671004525761, "num_chars": 2}, {"sum_logits": -1.777683138847351, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.777683138847351, "logits_per_char": -0.8888415694236755, "bits_per_byte": 1.2823273243444213, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 220, "native_id": "Mercury_SC_413439", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.319904088973999, "logits_per_token_corr": -1.319904088973999, "logits_per_char_corr": -0.6599520444869995, "bits_per_byte_corr": 0.9521095418066108}, "model_output": [{"sum_logits": -1.319904088973999, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.319904088973999, "logits_per_char": -0.6599520444869995, "bits_per_byte": 0.9521095418066108, "num_chars": 2}, {"sum_logits": -1.2949674129486084, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.2949674129486084, "logits_per_char": -0.6474837064743042, "bits_per_byte": 0.934121532387555, "num_chars": 2}, {"sum_logits": -1.3112828731536865, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3112828731536865, "logits_per_char": -0.6556414365768433, "bits_per_byte": 0.9458906491514072, "num_chars": 2}, {"sum_logits": -1.7209928035736084, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.7209928035736084, "logits_per_char": -0.8604964017868042, "bits_per_byte": 1.2414338915615113, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 221, "native_id": "ACTAAP_2014_7_13", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9499959945678711, "logits_per_token_corr": -0.9499959945678711, "logits_per_char_corr": -0.47499799728393555, "bits_per_byte_corr": 0.6852772551142154}, "model_output": [{"sum_logits": -1.4503793716430664, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4503793716430664, "logits_per_char": -0.7251896858215332, "bits_per_byte": 1.046227563439303, "num_chars": 2}, {"sum_logits": -0.9499959945678711, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -0.9499959945678711, "logits_per_char": -0.47499799728393555, "bits_per_byte": 0.6852772551142154, "num_chars": 2}, {"sum_logits": -1.380763053894043, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.380763053894043, "logits_per_char": -0.6903815269470215, "bits_per_byte": 0.9960100052485337, "num_chars": 2}, {"sum_logits": -2.1370344161987305, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -2.1370344161987305, "logits_per_char": -1.0685172080993652, "bits_per_byte": 1.5415444772305822, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 222, "native_id": "Mercury_SC_402638", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4551005363464355, "logits_per_token_corr": -1.4551005363464355, "logits_per_char_corr": -0.7275502681732178, "bits_per_byte_corr": 1.0496331638916907}, "model_output": [{"sum_logits": -1.4551005363464355, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4551005363464355, "logits_per_char": -0.7275502681732178, "bits_per_byte": 1.0496331638916907, "num_chars": 2}, {"sum_logits": -1.1545042991638184, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.1545042991638184, "logits_per_char": -0.5772521495819092, "bits_per_byte": 0.8327988135449128, "num_chars": 2}, {"sum_logits": -1.2846732139587402, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.2846732139587402, "logits_per_char": -0.6423366069793701, "bits_per_byte": 0.9266958374712462, "num_chars": 2}, {"sum_logits": -1.899055004119873, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.899055004119873, "logits_per_char": -0.9495275020599365, "bits_per_byte": 1.3698786184105396, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 223, "native_id": "Mercury_SC_406725", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2652673721313477, "logits_per_token_corr": -1.2652673721313477, "logits_per_char_corr": -0.6326336860656738, "bits_per_byte_corr": 0.9126974815869087}, "model_output": [{"sum_logits": -1.2652673721313477, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.2652673721313477, "logits_per_char": -0.6326336860656738, "bits_per_byte": 0.9126974815869087, "num_chars": 2}, {"sum_logits": -1.089695930480957, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.089695930480957, "logits_per_char": -0.5448479652404785, "bits_per_byte": 0.7860494574914454, "num_chars": 2}, {"sum_logits": -1.4139127731323242, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4139127731323242, "logits_per_char": -0.7069563865661621, "bits_per_byte": 1.0199224730245158, "num_chars": 2}, {"sum_logits": -2.087557792663574, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -2.087557792663574, "logits_per_char": -1.043778896331787, "bits_per_byte": 1.5058546375235065, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 224, "native_id": "NYSEDREGENTS_2015_4_29", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2967078685760498, "logits_per_token_corr": -1.2967078685760498, "logits_per_char_corr": -0.6483539342880249, "bits_per_byte_corr": 0.9353770057388544}, "model_output": [{"sum_logits": -1.2967078685760498, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2967078685760498, "logits_per_char": -0.6483539342880249, "bits_per_byte": 0.9353770057388544, "num_chars": 2}, {"sum_logits": -1.3191020488739014, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3191020488739014, "logits_per_char": -0.6595510244369507, "bits_per_byte": 0.9515309921691079, "num_chars": 2}, {"sum_logits": -1.430565595626831, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.430565595626831, "logits_per_char": -0.7152827978134155, "bits_per_byte": 1.031934945239339, "num_chars": 2}, {"sum_logits": -1.5934550762176514, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.5934550762176514, "logits_per_char": -0.7967275381088257, "bits_per_byte": 1.1494348681701012, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 225, "native_id": "Mercury_406136", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2745243310928345, "logits_per_token_corr": -1.2745243310928345, "logits_per_char_corr": -0.6372621655464172, "bits_per_byte_corr": 0.9193749659806383}, "model_output": [{"sum_logits": -1.2745243310928345, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.2745243310928345, "logits_per_char": -0.6372621655464172, "bits_per_byte": 0.9193749659806383, "num_chars": 2}, {"sum_logits": -1.1748806238174438, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.1748806238174438, "logits_per_char": -0.5874403119087219, "bits_per_byte": 0.8474972248095879, "num_chars": 2}, {"sum_logits": -1.3209720849990845, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.3209720849990845, "logits_per_char": -0.6604860424995422, "bits_per_byte": 0.9528799380911513, "num_chars": 2}, {"sum_logits": -2.0262980461120605, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -2.0262980461120605, "logits_per_char": -1.0131490230560303, "bits_per_byte": 1.461665071245483, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 226, "native_id": "MSA_2012_5_23", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4065654277801514, "logits_per_token_corr": -1.4065654277801514, "logits_per_char_corr": -0.7032827138900757, "bits_per_byte_corr": 1.0146224836728728}, "model_output": [{"sum_logits": -1.4316184520721436, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.4316184520721436, "logits_per_char": -0.7158092260360718, "bits_per_byte": 1.0326944206255497, "num_chars": 2}, {"sum_logits": -0.9988430142402649, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": true, "logits_per_token": -0.9988430142402649, "logits_per_char": -0.49942150712013245, "bits_per_byte": 0.7205129316360248, "num_chars": 2}, {"sum_logits": -1.4065654277801514, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.4065654277801514, "logits_per_char": -0.7032827138900757, "bits_per_byte": 1.0146224836728728, "num_chars": 2}, {"sum_logits": -1.9971072673797607, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.9971072673797607, "logits_per_char": -0.9985536336898804, "bits_per_byte": 1.44060837538708, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 227, "native_id": "Mercury_405873", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0150272846221924, "logits_per_token_corr": -1.0150272846221924, "logits_per_char_corr": -0.5075136423110962, "bits_per_byte_corr": 0.7321874149462397}, "model_output": [{"sum_logits": -1.150275468826294, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.150275468826294, "logits_per_char": -0.575137734413147, "bits_per_byte": 0.8297483572665569, "num_chars": 2}, {"sum_logits": -1.0150272846221924, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.0150272846221924, "logits_per_char": -0.5075136423110962, "bits_per_byte": 0.7321874149462397, "num_chars": 2}, {"sum_logits": -1.5595810413360596, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.5595810413360596, "logits_per_char": -0.7797905206680298, "bits_per_byte": 1.1249999171007976, "num_chars": 2}, {"sum_logits": -2.2685062885284424, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -2.2685062885284424, "logits_per_char": -1.1342531442642212, "bits_per_byte": 1.6363813863438816, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 228, "native_id": "Mercury_7043820", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2689008712768555, "logits_per_token_corr": -1.2689008712768555, "logits_per_char_corr": -0.6344504356384277, "bits_per_byte_corr": 0.9153184971860597}, "model_output": [{"sum_logits": -1.2906694412231445, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.2906694412231445, "logits_per_char": -0.6453347206115723, "bits_per_byte": 0.931021201140449, "num_chars": 2}, {"sum_logits": -1.2689008712768555, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.2689008712768555, "logits_per_char": -0.6344504356384277, "bits_per_byte": 0.9153184971860597, "num_chars": 2}, {"sum_logits": -1.5741243362426758, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5741243362426758, "logits_per_char": -0.7870621681213379, "bits_per_byte": 1.1354906868207855, "num_chars": 2}, {"sum_logits": -1.5750722885131836, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5750722885131836, "logits_per_char": -0.7875361442565918, "bits_per_byte": 1.1361744898406165, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 229, "native_id": "MCAS_2005_5_34", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.232871174812317, "logits_per_token_corr": -1.232871174812317, "logits_per_char_corr": -0.6164355874061584, "bits_per_byte_corr": 0.8893285649789788}, "model_output": [{"sum_logits": -1.1435290575027466, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.1435290575027466, "logits_per_char": -0.5717645287513733, "bits_per_byte": 0.824881850186414, "num_chars": 2}, {"sum_logits": -1.232871174812317, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.232871174812317, "logits_per_char": -0.6164355874061584, "bits_per_byte": 0.8893285649789788, "num_chars": 2}, {"sum_logits": -1.388722538948059, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.388722538948059, "logits_per_char": -0.6943612694740295, "bits_per_byte": 1.0017515600562672, "num_chars": 2}, {"sum_logits": -2.0350770950317383, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -2.0350770950317383, "logits_per_char": -1.0175385475158691, "bits_per_byte": 1.4679978164155578, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 230, "native_id": "Mercury_7182245", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6665782928466797, "logits_per_token_corr": -1.6665782928466797, "logits_per_char_corr": -0.8332891464233398, "bits_per_byte_corr": 1.2021821191724134}, "model_output": [{"sum_logits": -1.6047420501708984, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -1.6047420501708984, "logits_per_char": -0.8023710250854492, "bits_per_byte": 1.1575766988446032, "num_chars": 2}, {"sum_logits": -1.1534538269042969, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": true, "logits_per_token": -1.1534538269042969, "logits_per_char": -0.5767269134521484, "bits_per_byte": 0.8320410579852107, "num_chars": 2}, {"sum_logits": -1.2709789276123047, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -1.2709789276123047, "logits_per_char": -0.6354894638061523, "bits_per_byte": 0.9168174979709811, "num_chars": 2}, {"sum_logits": -1.6665782928466797, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -1.6665782928466797, "logits_per_char": -0.8332891464233398, "bits_per_byte": 1.2021821191724134, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 231, "native_id": "MSA_2012_8_30", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.611575722694397, "logits_per_token_corr": -1.611575722694397, "logits_per_char_corr": -0.8057878613471985, "bits_per_byte_corr": 1.1625061515749622}, "model_output": [{"sum_logits": -1.611575722694397, "num_tokens": 1, "num_tokens_all": 498, "is_greedy": false, "logits_per_token": -1.611575722694397, "logits_per_char": -0.8057878613471985, "bits_per_byte": 1.1625061515749622, "num_chars": 2}, {"sum_logits": -1.0862575769424438, "num_tokens": 1, "num_tokens_all": 498, "is_greedy": true, "logits_per_token": -1.0862575769424438, "logits_per_char": -0.5431287884712219, "bits_per_byte": 0.7835692096920256, "num_chars": 2}, {"sum_logits": -1.2973953485488892, "num_tokens": 1, "num_tokens_all": 498, "is_greedy": false, "logits_per_token": -1.2973953485488892, "logits_per_char": -0.6486976742744446, "bits_per_byte": 0.9358729177126176, "num_chars": 2}, {"sum_logits": -1.7805076837539673, "num_tokens": 1, "num_tokens_all": 498, "is_greedy": false, "logits_per_token": -1.7805076837539673, "logits_per_char": -0.8902538418769836, "bits_per_byte": 1.2843648028091945, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 232, "native_id": "Mercury_7252753", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2980657815933228, "logits_per_token_corr": -1.2980657815933228, "logits_per_char_corr": -0.6490328907966614, "bits_per_byte_corr": 0.9363565329268442}, "model_output": [{"sum_logits": -1.2980657815933228, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.2980657815933228, "logits_per_char": -0.6490328907966614, "bits_per_byte": 0.9363565329268442, "num_chars": 2}, {"sum_logits": -1.1997476816177368, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.1997476816177368, "logits_per_char": -0.5998738408088684, "bits_per_byte": 0.8654350152945917, "num_chars": 2}, {"sum_logits": -1.64230215549469, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.64230215549469, "logits_per_char": -0.821151077747345, "bits_per_byte": 1.1846705876875734, "num_chars": 2}, {"sum_logits": -1.527077317237854, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.527077317237854, "logits_per_char": -0.763538658618927, "bits_per_byte": 1.1015534363173285, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 233, "native_id": "TAKS_2009_8_36", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.639944076538086, "logits_per_token_corr": -1.639944076538086, "logits_per_char_corr": -0.819972038269043, "bits_per_byte_corr": 1.1829695932792135}, "model_output": [{"sum_logits": -1.5947856903076172, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.5947856903076172, "logits_per_char": -0.7973928451538086, "bits_per_byte": 1.1503947033445672, "num_chars": 2}, {"sum_logits": -1.639944076538086, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.639944076538086, "logits_per_char": -0.819972038269043, "bits_per_byte": 1.1829695932792135, "num_chars": 2}, {"sum_logits": -1.2635974884033203, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.2635974884033203, "logits_per_char": -0.6317987442016602, "bits_per_byte": 0.9114929151002646, "num_chars": 2}, {"sum_logits": -1.7429237365722656, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.7429237365722656, "logits_per_char": -0.8714618682861328, "bits_per_byte": 1.257253715701138, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 234, "native_id": "Mercury_SC_415473", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3583613634109497, "logits_per_token_corr": -1.3583613634109497, "logits_per_char_corr": -0.6791806817054749, "bits_per_byte_corr": 0.9798506013647781}, "model_output": [{"sum_logits": -1.4593764543533325, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4593764543533325, "logits_per_char": -0.7296882271766663, "bits_per_byte": 1.052717586743592, "num_chars": 2}, {"sum_logits": -1.5275641679763794, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.5275641679763794, "logits_per_char": -0.7637820839881897, "bits_per_byte": 1.1019046248903908, "num_chars": 2}, {"sum_logits": -1.3583613634109497, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.3583613634109497, "logits_per_char": -0.6791806817054749, "bits_per_byte": 0.9798506013647781, "num_chars": 2}, {"sum_logits": -1.6830397844314575, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.6830397844314575, "logits_per_char": -0.8415198922157288, "bits_per_byte": 1.214056575309919, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 235, "native_id": "Mercury_SC_413624", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2349746227264404, "logits_per_token_corr": -1.2349746227264404, "logits_per_char_corr": -0.6174873113632202, "bits_per_byte_corr": 0.890845881916217}, "model_output": [{"sum_logits": -1.2856738567352295, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.2856738567352295, "logits_per_char": -0.6428369283676147, "bits_per_byte": 0.9274176486569179, "num_chars": 2}, {"sum_logits": -1.2349746227264404, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.2349746227264404, "logits_per_char": -0.6174873113632202, "bits_per_byte": 0.890845881916217, "num_chars": 2}, {"sum_logits": -1.3906276226043701, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3906276226043701, "logits_per_char": -0.6953138113021851, "bits_per_byte": 1.0031257874279875, "num_chars": 2}, {"sum_logits": -1.7603042125701904, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.7603042125701904, "logits_per_char": -0.8801521062850952, "bits_per_byte": 1.2697910789663949, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 236, "native_id": "Mercury_7016800", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9234716892242432, "logits_per_token_corr": -1.9234716892242432, "logits_per_char_corr": -0.9617358446121216, "bits_per_byte_corr": 1.3874915336680633}, "model_output": [{"sum_logits": -1.1931631565093994, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.1931631565093994, "logits_per_char": -0.5965815782546997, "bits_per_byte": 0.8606852844343846, "num_chars": 2}, {"sum_logits": -1.1613447666168213, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.1613447666168213, "logits_per_char": -0.5806723833084106, "bits_per_byte": 0.8377331677808212, "num_chars": 2}, {"sum_logits": -1.5126802921295166, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.5126802921295166, "logits_per_char": -0.7563401460647583, "bits_per_byte": 1.091168177953645, "num_chars": 2}, {"sum_logits": -1.9234716892242432, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.9234716892242432, "logits_per_char": -0.9617358446121216, "bits_per_byte": 1.3874915336680633, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 237, "native_id": "Mercury_SC_407228", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7234206199645996, "logits_per_token_corr": -1.7234206199645996, "logits_per_char_corr": -0.8617103099822998, "bits_per_byte_corr": 1.2431851908952485}, "model_output": [{"sum_logits": -1.3524565696716309, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3524565696716309, "logits_per_char": -0.6762282848358154, "bits_per_byte": 0.9755911930421812, "num_chars": 2}, {"sum_logits": -1.231412410736084, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.231412410736084, "logits_per_char": -0.615706205368042, "bits_per_byte": 0.888276289129674, "num_chars": 2}, {"sum_logits": -1.3434653282165527, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3434653282165527, "logits_per_char": -0.6717326641082764, "bits_per_byte": 0.9691053833128384, "num_chars": 2}, {"sum_logits": -1.7234206199645996, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.7234206199645996, "logits_per_char": -0.8617103099822998, "bits_per_byte": 1.2431851908952485, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 238, "native_id": "Mercury_414504", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9515537023544312, "logits_per_token_corr": -0.9515537023544312, "logits_per_char_corr": -0.4757768511772156, "bits_per_byte_corr": 0.6864009037636284}, "model_output": [{"sum_logits": -1.1975520849227905, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.1975520849227905, "logits_per_char": -0.5987760424613953, "bits_per_byte": 0.8638512270627949, "num_chars": 2}, {"sum_logits": -0.9515537023544312, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": true, "logits_per_token": -0.9515537023544312, "logits_per_char": -0.4757768511772156, "bits_per_byte": 0.6864009037636284, "num_chars": 2}, {"sum_logits": -1.6882938146591187, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.6882938146591187, "logits_per_char": -0.8441469073295593, "bits_per_byte": 1.2178465569869856, "num_chars": 2}, {"sum_logits": -2.162548542022705, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -2.162548542022705, "logits_per_char": -1.0812742710113525, "bits_per_byte": 1.5599490286300282, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 239, "native_id": "TIMSS_2011_4_pg27", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6142058372497559, "logits_per_token_corr": -1.6142058372497559, "logits_per_char_corr": -0.8071029186248779, "bits_per_byte_corr": 1.1644033781879566}, "model_output": [{"sum_logits": -1.6142058372497559, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.6142058372497559, "logits_per_char": -0.8071029186248779, "bits_per_byte": 1.1644033781879566, "num_chars": 2}, {"sum_logits": -1.5141310691833496, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.5141310691833496, "logits_per_char": -0.7570655345916748, "bits_per_byte": 1.0922146923841458, "num_chars": 2}, {"sum_logits": -1.2625041007995605, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.2625041007995605, "logits_per_char": -0.6312520503997803, "bits_per_byte": 0.9107042026634072, "num_chars": 2}, {"sum_logits": -1.2577166557312012, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.2577166557312012, "logits_per_char": -0.6288583278656006, "bits_per_byte": 0.9072507910340796, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 240, "native_id": "Mercury_SC_402029", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.0124826431274414, "logits_per_token_corr": -2.0124826431274414, "logits_per_char_corr": -1.0062413215637207, "bits_per_byte_corr": 1.4516993645585796}, "model_output": [{"sum_logits": -1.16234290599823, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.16234290599823, "logits_per_char": -0.581171452999115, "bits_per_byte": 0.8384531731486589, "num_chars": 2}, {"sum_logits": -1.229866862297058, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.229866862297058, "logits_per_char": -0.614933431148529, "bits_per_byte": 0.887161411595455, "num_chars": 2}, {"sum_logits": -1.3860424757003784, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3860424757003784, "logits_per_char": -0.6930212378501892, "bits_per_byte": 0.9998183030779171, "num_chars": 2}, {"sum_logits": -2.0124826431274414, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -2.0124826431274414, "logits_per_char": -1.0062413215637207, "bits_per_byte": 1.4516993645585796, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 241, "native_id": "Mercury_7131845", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2800226211547852, "logits_per_token_corr": -1.2800226211547852, "logits_per_char_corr": -0.6400113105773926, "bits_per_byte_corr": 0.9233411438835138}, "model_output": [{"sum_logits": -1.6904096603393555, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.6904096603393555, "logits_per_char": -0.8452048301696777, "bits_per_byte": 1.2193728170220686, "num_chars": 2}, {"sum_logits": -1.0633306503295898, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.0633306503295898, "logits_per_char": -0.5316653251647949, "bits_per_byte": 0.7670309280284189, "num_chars": 2}, {"sum_logits": -1.2800226211547852, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.2800226211547852, "logits_per_char": -0.6400113105773926, "bits_per_byte": 0.9233411438835138, "num_chars": 2}, {"sum_logits": -1.7140378952026367, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.7140378952026367, "logits_per_char": -0.8570189476013184, "bits_per_byte": 1.2364169856531886, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 242, "native_id": "Mercury_SC_405533", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1600972414016724, "logits_per_token_corr": -1.1600972414016724, "logits_per_char_corr": -0.5800486207008362, "bits_per_byte_corr": 0.8368332685601809}, "model_output": [{"sum_logits": -1.4276715517044067, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4276715517044067, "logits_per_char": -0.7138357758522034, "bits_per_byte": 1.0298473338318392, "num_chars": 2}, {"sum_logits": -1.1600972414016724, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.1600972414016724, "logits_per_char": -0.5800486207008362, "bits_per_byte": 0.8368332685601809, "num_chars": 2}, {"sum_logits": -1.3052407503128052, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3052407503128052, "logits_per_char": -0.6526203751564026, "bits_per_byte": 0.9415321788219133, "num_chars": 2}, {"sum_logits": -1.8242722749710083, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.8242722749710083, "logits_per_char": -0.9121361374855042, "bits_per_byte": 1.315934282166896, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 243, "native_id": "Mercury_7086748", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3650339841842651, "logits_per_token_corr": -1.3650339841842651, "logits_per_char_corr": -0.6825169920921326, "bits_per_byte_corr": 0.9846638798144789}, "model_output": [{"sum_logits": -1.3650339841842651, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3650339841842651, "logits_per_char": -0.6825169920921326, "bits_per_byte": 0.9846638798144789, "num_chars": 2}, {"sum_logits": -1.3360689878463745, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3360689878463745, "logits_per_char": -0.6680344939231873, "bits_per_byte": 0.963770051526443, "num_chars": 2}, {"sum_logits": -1.3344038724899292, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.3344038724899292, "logits_per_char": -0.6672019362449646, "bits_per_byte": 0.9625689246928163, "num_chars": 2}, {"sum_logits": -1.6024760007858276, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.6024760007858276, "logits_per_char": -0.8012380003929138, "bits_per_byte": 1.1559420897394765, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 244, "native_id": "MDSA_2007_8_17", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0634353160858154, "logits_per_token_corr": -1.0634353160858154, "logits_per_char_corr": -0.5317176580429077, "bits_per_byte_corr": 0.7671064284121477}, "model_output": [{"sum_logits": -1.3782241344451904, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.3782241344451904, "logits_per_char": -0.6891120672225952, "bits_per_byte": 0.9941785619994944, "num_chars": 2}, {"sum_logits": -1.0634353160858154, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.0634353160858154, "logits_per_char": -0.5317176580429077, "bits_per_byte": 0.7671064284121477, "num_chars": 2}, {"sum_logits": -1.3008677959442139, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.3008677959442139, "logits_per_char": -0.6504338979721069, "bits_per_byte": 0.9383777590311109, "num_chars": 2}, {"sum_logits": -2.1188552379608154, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -2.1188552379608154, "logits_per_char": -1.0594276189804077, "bits_per_byte": 1.5284309720849345, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 245, "native_id": "Mercury_7210473", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.119519591331482, "logits_per_token_corr": -1.119519591331482, "logits_per_char_corr": -0.559759795665741, "bits_per_byte_corr": 0.8075626812965642}, "model_output": [{"sum_logits": -1.5387433767318726, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.5387433767318726, "logits_per_char": -0.7693716883659363, "bits_per_byte": 1.1099687194067027, "num_chars": 2}, {"sum_logits": -1.155743956565857, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.155743956565857, "logits_per_char": -0.5778719782829285, "bits_per_byte": 0.8336930373380746, "num_chars": 2}, {"sum_logits": -1.119519591331482, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.119519591331482, "logits_per_char": -0.559759795665741, "bits_per_byte": 0.8075626812965642, "num_chars": 2}, {"sum_logits": -2.0483508110046387, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -2.0483508110046387, "logits_per_char": -1.0241754055023193, "bits_per_byte": 1.4775727785197008, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 246, "native_id": "Mercury_7214340", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3492515087127686, "logits_per_token_corr": -1.3492515087127686, "logits_per_char_corr": -0.6746257543563843, "bits_per_byte_corr": 0.9732792302666309}, "model_output": [{"sum_logits": -1.3492515087127686, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3492515087127686, "logits_per_char": -0.6746257543563843, "bits_per_byte": 0.9732792302666309, "num_chars": 2}, {"sum_logits": -1.2546679973602295, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2546679973602295, "logits_per_char": -0.6273339986801147, "bits_per_byte": 0.9050516488774952, "num_chars": 2}, {"sum_logits": -1.282597303390503, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.282597303390503, "logits_per_char": -0.6412986516952515, "bits_per_byte": 0.9251983845301827, "num_chars": 2}, {"sum_logits": -1.8273742198944092, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.8273742198944092, "logits_per_char": -0.9136871099472046, "bits_per_byte": 1.3181718624459482, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 247, "native_id": "MCAS_2005_9_17", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.612645149230957, "logits_per_token_corr": -1.612645149230957, "logits_per_char_corr": -0.8063225746154785, "bits_per_byte_corr": 1.1632775797554078}, "model_output": [{"sum_logits": -1.2899713516235352, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.2899713516235352, "logits_per_char": -0.6449856758117676, "bits_per_byte": 0.9305176359387223, "num_chars": 2}, {"sum_logits": -1.1950349807739258, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.1950349807739258, "logits_per_char": -0.5975174903869629, "bits_per_byte": 0.8620355202263096, "num_chars": 2}, {"sum_logits": -1.612645149230957, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.612645149230957, "logits_per_char": -0.8063225746154785, "bits_per_byte": 1.1632775797554078, "num_chars": 2}, {"sum_logits": -1.7083826065063477, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.7083826065063477, "logits_per_char": -0.8541913032531738, "bits_per_byte": 1.23233755717472, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 248, "native_id": "MEA_2016_8_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2002997398376465, "logits_per_token_corr": -1.2002997398376465, "logits_per_char_corr": -0.6001498699188232, "bits_per_byte_corr": 0.8658332411226648}, "model_output": [{"sum_logits": -1.4853720664978027, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4853720664978027, "logits_per_char": -0.7426860332489014, "bits_per_byte": 1.0714694571064556, "num_chars": 2}, {"sum_logits": -1.2002997398376465, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.2002997398376465, "logits_per_char": -0.6001498699188232, "bits_per_byte": 0.8658332411226648, "num_chars": 2}, {"sum_logits": -1.3674731254577637, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3674731254577637, "logits_per_char": -0.6837365627288818, "bits_per_byte": 0.9864233483241321, "num_chars": 2}, {"sum_logits": -1.627793788909912, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.627793788909912, "logits_per_char": -0.813896894454956, "bits_per_byte": 1.1742050134259367, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 249, "native_id": "Mercury_SC_401278", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2754055261611938, "logits_per_token_corr": -1.2754055261611938, "logits_per_char_corr": -0.6377027630805969, "bits_per_byte_corr": 0.9200106138582277}, "model_output": [{"sum_logits": -1.27739679813385, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.27739679813385, "logits_per_char": -0.638698399066925, "bits_per_byte": 0.921447012958235, "num_chars": 2}, {"sum_logits": -1.2754055261611938, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.2754055261611938, "logits_per_char": -0.6377027630805969, "bits_per_byte": 0.9200106138582277, "num_chars": 2}, {"sum_logits": -1.3439842462539673, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3439842462539673, "logits_per_char": -0.6719921231269836, "bits_per_byte": 0.9694797035524415, "num_chars": 2}, {"sum_logits": -1.8555275201797485, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.8555275201797485, "logits_per_char": -0.9277637600898743, "bits_per_byte": 1.3384801757991212, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 250, "native_id": "Mercury_SC_407689", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.624437928199768, "logits_per_token_corr": -1.624437928199768, "logits_per_char_corr": -0.812218964099884, "bits_per_byte_corr": 1.1717842716237155}, "model_output": [{"sum_logits": -1.5088926553726196, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5088926553726196, "logits_per_char": -0.7544463276863098, "bits_per_byte": 1.088435975570711, "num_chars": 2}, {"sum_logits": -1.2373987436294556, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2373987436294556, "logits_per_char": -0.6186993718147278, "bits_per_byte": 0.8925945155188659, "num_chars": 2}, {"sum_logits": -1.2901064157485962, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.2901064157485962, "logits_per_char": -0.6450532078742981, "bits_per_byte": 0.930615064110436, "num_chars": 2}, {"sum_logits": -1.624437928199768, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.624437928199768, "logits_per_char": -0.812218964099884, "bits_per_byte": 1.1717842716237155, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 251, "native_id": "Mercury_7230405", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1489334106445312, "logits_per_token_corr": -1.1489334106445312, "logits_per_char_corr": -0.5744667053222656, "bits_per_byte_corr": 0.8287802669248495}, "model_output": [{"sum_logits": -1.6246471405029297, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.6246471405029297, "logits_per_char": -0.8123235702514648, "bits_per_byte": 1.1719351863998477, "num_chars": 2}, {"sum_logits": -1.1489334106445312, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.1489334106445312, "logits_per_char": -0.5744667053222656, "bits_per_byte": 0.8287802669248495, "num_chars": 2}, {"sum_logits": -1.213827133178711, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.213827133178711, "logits_per_char": -0.6069135665893555, "bits_per_byte": 0.8755911927673259, "num_chars": 2}, {"sum_logits": -1.7236900329589844, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.7236900329589844, "logits_per_char": -0.8618450164794922, "bits_per_byte": 1.2433795312907236, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 252, "native_id": "Mercury_SC_405640", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3964924812316895, "logits_per_token_corr": -1.3964924812316895, "logits_per_char_corr": -0.6982462406158447, "bits_per_byte_corr": 1.0073563886565648}, "model_output": [{"sum_logits": -1.3964924812316895, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.3964924812316895, "logits_per_char": -0.6982462406158447, "bits_per_byte": 1.0073563886565648, "num_chars": 2}, {"sum_logits": -1.2187905311584473, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.2187905311584473, "logits_per_char": -0.6093952655792236, "bits_per_byte": 0.8791715275929903, "num_chars": 2}, {"sum_logits": -1.2344822883605957, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.2344822883605957, "logits_per_char": -0.6172411441802979, "bits_per_byte": 0.8904907377421851, "num_chars": 2}, {"sum_logits": -1.9237942695617676, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.9237942695617676, "logits_per_char": -0.9618971347808838, "bits_per_byte": 1.387724226194681, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 253, "native_id": "Mercury_7201775", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5661189556121826, "logits_per_token_corr": -1.5661189556121826, "logits_per_char_corr": -0.7830594778060913, "bits_per_byte_corr": 1.129716025352761}, "model_output": [{"sum_logits": -1.3157241344451904, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3157241344451904, "logits_per_char": -0.6578620672225952, "bits_per_byte": 0.9490943419716819, "num_chars": 2}, {"sum_logits": -1.3993384838104248, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3993384838104248, "logits_per_char": -0.6996692419052124, "bits_per_byte": 1.0094093455599156, "num_chars": 2}, {"sum_logits": -1.3348300457000732, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3348300457000732, "logits_per_char": -0.6674150228500366, "bits_per_byte": 0.9628763436812338, "num_chars": 2}, {"sum_logits": -1.5661189556121826, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.5661189556121826, "logits_per_char": -0.7830594778060913, "bits_per_byte": 1.129716025352761, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 254, "native_id": "Mercury_7177398", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7891825437545776, "logits_per_token_corr": -1.7891825437545776, "logits_per_char_corr": -0.8945912718772888, "bits_per_byte_corr": 1.2906223915608421}, "model_output": [{"sum_logits": -1.3982924222946167, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3982924222946167, "logits_per_char": -0.6991462111473083, "bits_per_byte": 1.0086547716792544, "num_chars": 2}, {"sum_logits": -1.1967791318893433, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.1967791318893433, "logits_per_char": -0.5983895659446716, "bits_per_byte": 0.8632936593086973, "num_chars": 2}, {"sum_logits": -1.3164137601852417, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3164137601852417, "logits_per_char": -0.6582068800926208, "bits_per_byte": 0.9495918017893029, "num_chars": 2}, {"sum_logits": -1.7891825437545776, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.7891825437545776, "logits_per_char": -0.8945912718772888, "bits_per_byte": 1.2906223915608421, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 255, "native_id": "Mercury_7041423", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5642046928405762, "logits_per_token_corr": -1.5642046928405762, "logits_per_char_corr": -0.7821023464202881, "bits_per_byte_corr": 1.1283351766489824}, "model_output": [{"sum_logits": -1.5642046928405762, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.5642046928405762, "logits_per_char": -0.7821023464202881, "bits_per_byte": 1.1283351766489824, "num_chars": 2}, {"sum_logits": -1.2216696739196777, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.2216696739196777, "logits_per_char": -0.6108348369598389, "bits_per_byte": 0.8812483900848112, "num_chars": 2}, {"sum_logits": -1.1995692253112793, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.1995692253112793, "logits_per_char": -0.5997846126556396, "bits_per_byte": 0.8653062862804208, "num_chars": 2}, {"sum_logits": -1.7564921379089355, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.7564921379089355, "logits_per_char": -0.8782460689544678, "bits_per_byte": 1.2670412483617475, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 256, "native_id": "Mercury_7004743", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8407065868377686, "logits_per_token_corr": -1.8407065868377686, "logits_per_char_corr": -0.9203532934188843, "bits_per_byte_corr": 1.3277891322822033}, "model_output": [{"sum_logits": -1.103905439376831, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.103905439376831, "logits_per_char": -0.5519527196884155, "bits_per_byte": 0.7962994515002253, "num_chars": 2}, {"sum_logits": -1.3753249645233154, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3753249645233154, "logits_per_char": -0.6876624822616577, "bits_per_byte": 0.9920872529650011, "num_chars": 2}, {"sum_logits": -1.5275390148162842, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.5275390148162842, "logits_per_char": -0.7637695074081421, "bits_per_byte": 1.1018864807207247, "num_chars": 2}, {"sum_logits": -1.8407065868377686, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.8407065868377686, "logits_per_char": -0.9203532934188843, "bits_per_byte": 1.3277891322822033, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 257, "native_id": "Mercury_7198468", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5832079648971558, "logits_per_token_corr": -1.5832079648971558, "logits_per_char_corr": -0.7916039824485779, "bits_per_byte_corr": 1.1420431398273378}, "model_output": [{"sum_logits": -1.5832079648971558, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.5832079648971558, "logits_per_char": -0.7916039824485779, "bits_per_byte": 1.1420431398273378, "num_chars": 2}, {"sum_logits": -1.0233649015426636, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": true, "logits_per_token": -1.0233649015426636, "logits_per_char": -0.5116824507713318, "bits_per_byte": 0.7382017342382419, "num_chars": 2}, {"sum_logits": -1.2484415769577026, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.2484415769577026, "logits_per_char": -0.6242207884788513, "bits_per_byte": 0.9005602359588843, "num_chars": 2}, {"sum_logits": -2.041444778442383, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -2.041444778442383, "logits_per_char": -1.0207223892211914, "bits_per_byte": 1.472591129054805, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 258, "native_id": "MEA_2014_5_11", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.255642056465149, "logits_per_token_corr": -1.255642056465149, "logits_per_char_corr": -0.6278210282325745, "bits_per_byte_corr": 0.9057542839975958}, "model_output": [{"sum_logits": -1.4671193361282349, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4671193361282349, "logits_per_char": -0.7335596680641174, "bits_per_byte": 1.0583028953130167, "num_chars": 2}, {"sum_logits": -1.069158673286438, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.069158673286438, "logits_per_char": -0.534579336643219, "bits_per_byte": 0.7712349579374378, "num_chars": 2}, {"sum_logits": -1.255642056465149, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.255642056465149, "logits_per_char": -0.6278210282325745, "bits_per_byte": 0.9057542839975958, "num_chars": 2}, {"sum_logits": -2.0099010467529297, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -2.0099010467529297, "logits_per_char": -1.0049505233764648, "bits_per_byte": 1.4498371364150358, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 259, "native_id": "Mercury_410602", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5602484941482544, "logits_per_token_corr": -1.5602484941482544, "logits_per_char_corr": -0.7801242470741272, "bits_per_byte_corr": 1.1254813825318883}, "model_output": [{"sum_logits": -1.5602484941482544, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.5602484941482544, "logits_per_char": -0.7801242470741272, "bits_per_byte": 1.1254813825318883, "num_chars": 2}, {"sum_logits": -1.4564448595046997, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4564448595046997, "logits_per_char": -0.7282224297523499, "bits_per_byte": 1.0506028880685814, "num_chars": 2}, {"sum_logits": -1.3173495531082153, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.3173495531082153, "logits_per_char": -0.6586747765541077, "bits_per_byte": 0.9502668336939399, "num_chars": 2}, {"sum_logits": -1.6260043382644653, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.6260043382644653, "logits_per_char": -0.8130021691322327, "bits_per_byte": 1.172914197639885, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 260, "native_id": "Mercury_7108868", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3436768054962158, "logits_per_token_corr": -1.3436768054962158, "logits_per_char_corr": -0.6718384027481079, "bits_per_byte_corr": 0.9692579319241538}, "model_output": [{"sum_logits": -1.6670372486114502, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.6670372486114502, "logits_per_char": -0.8335186243057251, "bits_per_byte": 1.2025131857753246, "num_chars": 2}, {"sum_logits": -1.0862419605255127, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.0862419605255127, "logits_per_char": -0.5431209802627563, "bits_per_byte": 0.7835579448283941, "num_chars": 2}, {"sum_logits": -1.3436768054962158, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3436768054962158, "logits_per_char": -0.6718384027481079, "bits_per_byte": 0.9692579319241538, "num_chars": 2}, {"sum_logits": -1.6152775287628174, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.6152775287628174, "logits_per_char": -0.8076387643814087, "bits_per_byte": 1.1651764402035854, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 261, "native_id": "Mercury_7033828", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6925246715545654, "logits_per_token_corr": -1.6925246715545654, "logits_per_char_corr": -0.8462623357772827, "bits_per_byte_corr": 1.2208984751178737}, "model_output": [{"sum_logits": -1.2459285259246826, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.2459285259246826, "logits_per_char": -0.6229642629623413, "bits_per_byte": 0.8987474528274636, "num_chars": 2}, {"sum_logits": -1.3150336742401123, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3150336742401123, "logits_per_char": -0.6575168371200562, "bits_per_byte": 0.9485962802147828, "num_chars": 2}, {"sum_logits": -1.4352748394012451, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4352748394012451, "logits_per_char": -0.7176374197006226, "bits_per_byte": 1.0353319465591837, "num_chars": 2}, {"sum_logits": -1.6925246715545654, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.6925246715545654, "logits_per_char": -0.8462623357772827, "bits_per_byte": 1.2208984751178737, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 262, "native_id": "TIMSS_2007_4_pg19", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4276982545852661, "logits_per_token_corr": -1.4276982545852661, "logits_per_char_corr": -0.7138491272926331, "bits_per_byte_corr": 1.029866595888736}, "model_output": [{"sum_logits": -1.4276982545852661, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4276982545852661, "logits_per_char": -0.7138491272926331, "bits_per_byte": 1.029866595888736, "num_chars": 2}, {"sum_logits": -1.173660397529602, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.173660397529602, "logits_per_char": -0.586830198764801, "bits_per_byte": 0.8466170176024714, "num_chars": 2}, {"sum_logits": -1.1336880922317505, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.1336880922317505, "logits_per_char": -0.5668440461158752, "bits_per_byte": 0.8177830942893957, "num_chars": 2}, {"sum_logits": -2.137786865234375, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -2.137786865234375, "logits_per_char": -1.0688934326171875, "bits_per_byte": 1.5420872544767057, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 263, "native_id": "Mercury_400828", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7688343524932861, "logits_per_token_corr": -1.7688343524932861, "logits_per_char_corr": -0.8844171762466431, "bits_per_byte_corr": 1.275944274248969}, "model_output": [{"sum_logits": -1.5640480518341064, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.5640480518341064, "logits_per_char": -0.7820240259170532, "bits_per_byte": 1.1282221840473654, "num_chars": 2}, {"sum_logits": -1.4035942554473877, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4035942554473877, "logits_per_char": -0.7017971277236938, "bits_per_byte": 1.012479235877819, "num_chars": 2}, {"sum_logits": -1.090358018875122, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.090358018875122, "logits_per_char": -0.545179009437561, "bits_per_byte": 0.7865270533128917, "num_chars": 2}, {"sum_logits": -1.7688343524932861, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.7688343524932861, "logits_per_char": -0.8844171762466431, "bits_per_byte": 1.275944274248969, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 264, "native_id": "VASoL_2008_3_16", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.244281530380249, "logits_per_token_corr": -1.244281530380249, "logits_per_char_corr": -0.6221407651901245, "bits_per_byte_corr": 0.8975593966753025}, "model_output": [{"sum_logits": -1.5393617153167725, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.5393617153167725, "logits_per_char": -0.7696808576583862, "bits_per_byte": 1.1104147564117157, "num_chars": 2}, {"sum_logits": -1.244281530380249, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.244281530380249, "logits_per_char": -0.6221407651901245, "bits_per_byte": 0.8975593966753025, "num_chars": 2}, {"sum_logits": -1.2302587032318115, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.2302587032318115, "logits_per_char": -0.6151293516159058, "bits_per_byte": 0.8874440650821482, "num_chars": 2}, {"sum_logits": -1.657438039779663, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.657438039779663, "logits_per_char": -0.8287190198898315, "bits_per_byte": 1.1955888202862812, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 265, "native_id": "LEAP__5_10315", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3567029237747192, "logits_per_token_corr": -1.3567029237747192, "logits_per_char_corr": -0.6783514618873596, "bits_per_byte_corr": 0.9786542900453755}, "model_output": [{"sum_logits": -1.2782498598098755, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.2782498598098755, "logits_per_char": -0.6391249299049377, "bits_per_byte": 0.9220623668830225, "num_chars": 2}, {"sum_logits": -1.144802212715149, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.144802212715149, "logits_per_char": -0.5724011063575745, "bits_per_byte": 0.825800237542022, "num_chars": 2}, {"sum_logits": -1.3567029237747192, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.3567029237747192, "logits_per_char": -0.6783514618873596, "bits_per_byte": 0.9786542900453755, "num_chars": 2}, {"sum_logits": -2.015289306640625, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -2.015289306640625, "logits_per_char": -1.0076446533203125, "bits_per_byte": 1.453723944324538, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 266, "native_id": "Mercury_SC_415471", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2016115188598633, "logits_per_token_corr": -1.2016115188598633, "logits_per_char_corr": -0.6008057594299316, "bits_per_byte_corr": 0.8667794896677127}, "model_output": [{"sum_logits": -1.2016115188598633, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.2016115188598633, "logits_per_char": -0.6008057594299316, "bits_per_byte": 0.8667794896677127, "num_chars": 2}, {"sum_logits": -1.1579389572143555, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.1579389572143555, "logits_per_char": -0.5789694786071777, "bits_per_byte": 0.8352763956132442, "num_chars": 2}, {"sum_logits": -1.380049705505371, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.380049705505371, "logits_per_char": -0.6900248527526855, "bits_per_byte": 0.9954954331571518, "num_chars": 2}, {"sum_logits": -2.0982179641723633, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -2.0982179641723633, "logits_per_char": -1.0491089820861816, "bits_per_byte": 1.51354432580889, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 267, "native_id": "Mercury_7247065", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4202390909194946, "logits_per_token_corr": -1.4202390909194946, "logits_per_char_corr": -0.7101195454597473, "bits_per_byte_corr": 1.0244859466738383}, "model_output": [{"sum_logits": -1.431069016456604, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.431069016456604, "logits_per_char": -0.715534508228302, "bits_per_byte": 1.032298086606636, "num_chars": 2}, {"sum_logits": -1.0947309732437134, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.0947309732437134, "logits_per_char": -0.5473654866218567, "bits_per_byte": 0.7896814731036942, "num_chars": 2}, {"sum_logits": -1.4202390909194946, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4202390909194946, "logits_per_char": -0.7101195454597473, "bits_per_byte": 1.0244859466738383, "num_chars": 2}, {"sum_logits": -1.7910429239273071, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.7910429239273071, "logits_per_char": -0.8955214619636536, "bits_per_byte": 1.2919643721855256, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 268, "native_id": "MDSA_2011_5_3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1373405456542969, "logits_per_token_corr": -1.1373405456542969, "logits_per_char_corr": -0.5686702728271484, "bits_per_byte_corr": 0.8204177825092903}, "model_output": [{"sum_logits": -1.6681251525878906, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.6681251525878906, "logits_per_char": -0.8340625762939453, "bits_per_byte": 1.203297942611212, "num_chars": 2}, {"sum_logits": -1.1373405456542969, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.1373405456542969, "logits_per_char": -0.5686702728271484, "bits_per_byte": 0.8204177825092903, "num_chars": 2}, {"sum_logits": -1.3479461669921875, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.3479461669921875, "logits_per_char": -0.6739730834960938, "bits_per_byte": 0.9723376252531563, "num_chars": 2}, {"sum_logits": -1.5870685577392578, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.5870685577392578, "logits_per_char": -0.7935342788696289, "bits_per_byte": 1.1448279689014358, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 269, "native_id": "MDSA_2009_5_39", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.377366542816162, "logits_per_token_corr": -1.377366542816162, "logits_per_char_corr": -0.688683271408081, "bits_per_byte_corr": 0.9935599404043404}, "model_output": [{"sum_logits": -1.2246432304382324, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": true, "logits_per_token": -1.2246432304382324, "logits_per_char": -0.6123216152191162, "bits_per_byte": 0.8833933577063736, "num_chars": 2}, {"sum_logits": -1.3935694694519043, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": false, "logits_per_token": -1.3935694694519043, "logits_per_char": -0.6967847347259521, "bits_per_byte": 1.0052478813569852, "num_chars": 2}, {"sum_logits": -1.377366542816162, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": false, "logits_per_token": -1.377366542816162, "logits_per_char": -0.688683271408081, "bits_per_byte": 0.9935599404043404, "num_chars": 2}, {"sum_logits": -1.6908278465270996, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": false, "logits_per_token": -1.6908278465270996, "logits_per_char": -0.8454139232635498, "bits_per_byte": 1.2196744745916823, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 270, "native_id": "Mercury_187198", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2533398866653442, "logits_per_token_corr": -1.2533398866653442, "logits_per_char_corr": -0.6266699433326721, "bits_per_byte_corr": 0.9040936195208633}, "model_output": [{"sum_logits": -1.4219971895217896, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4219971895217896, "logits_per_char": -0.7109985947608948, "bits_per_byte": 1.0257541467413016, "num_chars": 2}, {"sum_logits": -1.1756192445755005, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.1756192445755005, "logits_per_char": -0.5878096222877502, "bits_per_byte": 0.8480300270619612, "num_chars": 2}, {"sum_logits": -1.2533398866653442, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.2533398866653442, "logits_per_char": -0.6266699433326721, "bits_per_byte": 0.9040936195208633, "num_chars": 2}, {"sum_logits": -1.9085561037063599, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.9085561037063599, "logits_per_char": -0.9542780518531799, "bits_per_byte": 1.376732213038753, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 271, "native_id": "MCAS_2000_4_36", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4398741722106934, "logits_per_token_corr": -1.4398741722106934, "logits_per_char_corr": -0.7199370861053467, "bits_per_byte_corr": 1.0386496638769804}, "model_output": [{"sum_logits": -1.4398741722106934, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4398741722106934, "logits_per_char": -0.7199370861053467, "bits_per_byte": 1.0386496638769804, "num_chars": 2}, {"sum_logits": -1.176846981048584, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.176846981048584, "logits_per_char": -0.588423490524292, "bits_per_byte": 0.8489156517225799, "num_chars": 2}, {"sum_logits": -1.5097079277038574, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.5097079277038574, "logits_per_char": -0.7548539638519287, "bits_per_byte": 1.0890240702453369, "num_chars": 2}, {"sum_logits": -1.5271239280700684, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.5271239280700684, "logits_per_char": -0.7635619640350342, "bits_per_byte": 1.1015870589255723, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 272, "native_id": "Mercury_184100", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2819850444793701, "logits_per_token_corr": -1.2819850444793701, "logits_per_char_corr": -0.6409925222396851, "bits_per_byte_corr": 0.9247567330827666}, "model_output": [{"sum_logits": -1.442213773727417, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.442213773727417, "logits_per_char": -0.7211068868637085, "bits_per_byte": 1.0403373296298986, "num_chars": 2}, {"sum_logits": -1.2143409252166748, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.2143409252166748, "logits_per_char": -0.6071704626083374, "bits_per_byte": 0.8759618153799354, "num_chars": 2}, {"sum_logits": -1.2819850444793701, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.2819850444793701, "logits_per_char": -0.6409925222396851, "bits_per_byte": 0.9247567330827666, "num_chars": 2}, {"sum_logits": -1.749596357345581, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.749596357345581, "logits_per_char": -0.8747981786727905, "bits_per_byte": 1.262066994150839, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 273, "native_id": "Mercury_LBS10814", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1342127323150635, "logits_per_token_corr": -1.1342127323150635, "logits_per_char_corr": -0.5671063661575317, "bits_per_byte_corr": 0.8181615421126195}, "model_output": [{"sum_logits": -1.225628137588501, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.225628137588501, "logits_per_char": -0.6128140687942505, "bits_per_byte": 0.8841038180370884, "num_chars": 2}, {"sum_logits": -1.1342127323150635, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.1342127323150635, "logits_per_char": -0.5671063661575317, "bits_per_byte": 0.8181615421126195, "num_chars": 2}, {"sum_logits": -1.5214827060699463, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5214827060699463, "logits_per_char": -0.7607413530349731, "bits_per_byte": 1.0975177774235043, "num_chars": 2}, {"sum_logits": -1.908571481704712, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.908571481704712, "logits_per_char": -0.954285740852356, "bits_per_byte": 1.3767433059197336, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 274, "native_id": "Mercury_SC_408384", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1956157684326172, "logits_per_token_corr": -1.1956157684326172, "logits_per_char_corr": -0.5978078842163086, "bits_per_byte_corr": 0.8624544699638117}, "model_output": [{"sum_logits": -0.9203033447265625, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -0.9203033447265625, "logits_per_char": -0.46015167236328125, "bits_per_byte": 0.6638585357757458, "num_chars": 2}, {"sum_logits": -1.1956157684326172, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.1956157684326172, "logits_per_char": -0.5978078842163086, "bits_per_byte": 0.8624544699638117, "num_chars": 2}, {"sum_logits": -1.6561298370361328, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.6561298370361328, "logits_per_char": -0.8280649185180664, "bits_per_byte": 1.1946451514809961, "num_chars": 2}, {"sum_logits": -2.3260631561279297, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -2.3260631561279297, "logits_per_char": -1.1630315780639648, "bits_per_byte": 1.6778998900713529, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 275, "native_id": "Mercury_7043068", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6400166749954224, "logits_per_token_corr": -1.6400166749954224, "logits_per_char_corr": -0.8200083374977112, "bits_per_byte_corr": 1.1830219619964013}, "model_output": [{"sum_logits": -1.6346646547317505, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.6346646547317505, "logits_per_char": -0.8173323273658752, "bits_per_byte": 1.17916129544983, "num_chars": 2}, {"sum_logits": -1.1953755617141724, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.1953755617141724, "logits_per_char": -0.5976877808570862, "bits_per_byte": 0.8622811974430672, "num_chars": 2}, {"sum_logits": -1.2346097230911255, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.2346097230911255, "logits_per_char": -0.6173048615455627, "bits_per_byte": 0.8905826624690714, "num_chars": 2}, {"sum_logits": -1.6400166749954224, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.6400166749954224, "logits_per_char": -0.8200083374977112, "bits_per_byte": 1.1830219619964013, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 276, "native_id": "Mercury_411071", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.450232982635498, "logits_per_token_corr": -1.450232982635498, "logits_per_char_corr": -0.725116491317749, "bits_per_byte_corr": 1.0461219660916732}, "model_output": [{"sum_logits": -1.5806212425231934, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5806212425231934, "logits_per_char": -0.7903106212615967, "bits_per_byte": 1.1401772140568005, "num_chars": 2}, {"sum_logits": -1.450232982635498, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.450232982635498, "logits_per_char": -0.725116491317749, "bits_per_byte": 1.0461219660916732, "num_chars": 2}, {"sum_logits": -1.2651305198669434, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2651305198669434, "logits_per_char": -0.6325652599334717, "bits_per_byte": 0.9125987635453133, "num_chars": 2}, {"sum_logits": -1.4057135581970215, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4057135581970215, "logits_per_char": -0.7028567790985107, "bits_per_byte": 1.0140079896613396, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 277, "native_id": "NYSEDREGENTS_2010_4_24", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1883912086486816, "logits_per_token_corr": -1.1883912086486816, "logits_per_char_corr": -0.5941956043243408, "bits_per_byte_corr": 0.857243051677363}, "model_output": [{"sum_logits": -1.5239129066467285, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.5239129066467285, "logits_per_char": -0.7619564533233643, "bits_per_byte": 1.0992707965837503, "num_chars": 2}, {"sum_logits": -1.1883912086486816, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.1883912086486816, "logits_per_char": -0.5941956043243408, "bits_per_byte": 0.857243051677363, "num_chars": 2}, {"sum_logits": -1.2261815071105957, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.2261815071105957, "logits_per_char": -0.6130907535552979, "bits_per_byte": 0.8845029897697413, "num_chars": 2}, {"sum_logits": -1.7528462409973145, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.7528462409973145, "logits_per_char": -0.8764231204986572, "bits_per_byte": 1.2644112896647517, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 278, "native_id": "Mercury_SC_409673", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.031386613845825, "logits_per_token_corr": -2.031386613845825, "logits_per_char_corr": -1.0156933069229126, "bits_per_byte_corr": 1.4653356969628506}, "model_output": [{"sum_logits": -1.2964813709259033, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.2964813709259033, "logits_per_char": -0.6482406854629517, "bits_per_byte": 0.9352136222205346, "num_chars": 2}, {"sum_logits": -1.0900375843048096, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.0900375843048096, "logits_per_char": -0.5450187921524048, "bits_per_byte": 0.786295908630132, "num_chars": 2}, {"sum_logits": -1.4035656452178955, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4035656452178955, "logits_per_char": -0.7017828226089478, "bits_per_byte": 1.0124585979597154, "num_chars": 2}, {"sum_logits": -2.031386613845825, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -2.031386613845825, "logits_per_char": -1.0156933069229126, "bits_per_byte": 1.4653356969628506, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 279, "native_id": "Mercury_SC_400374", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.275854468345642, "logits_per_token_corr": -1.275854468345642, "logits_per_char_corr": -0.637927234172821, "bits_per_byte_corr": 0.9203344571898026}, "model_output": [{"sum_logits": -1.3496726751327515, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3496726751327515, "logits_per_char": -0.6748363375663757, "bits_per_byte": 0.9735830376193803, "num_chars": 2}, {"sum_logits": -1.275854468345642, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.275854468345642, "logits_per_char": -0.637927234172821, "bits_per_byte": 0.9203344571898026, "num_chars": 2}, {"sum_logits": -1.4584354162216187, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4584354162216187, "logits_per_char": -0.7292177081108093, "bits_per_byte": 1.052038771220636, "num_chars": 2}, {"sum_logits": -1.541540503501892, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.541540503501892, "logits_per_char": -0.770770251750946, "bits_per_byte": 1.1119864198666267, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 280, "native_id": "CSZ_2009_8_CSZ20740", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3681516647338867, "logits_per_token_corr": -1.3681516647338867, "logits_per_char_corr": -0.6840758323669434, "bits_per_byte_corr": 0.986912810948488}, "model_output": [{"sum_logits": -1.3618364334106445, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3618364334106445, "logits_per_char": -0.6809182167053223, "bits_per_byte": 0.9823573344924307, "num_chars": 2}, {"sum_logits": -1.3681516647338867, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3681516647338867, "logits_per_char": -0.6840758323669434, "bits_per_byte": 0.986912810948488, "num_chars": 2}, {"sum_logits": -1.3452444076538086, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.3452444076538086, "logits_per_char": -0.6726222038269043, "bits_per_byte": 0.9703887178535775, "num_chars": 2}, {"sum_logits": -1.7686777114868164, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.7686777114868164, "logits_per_char": -0.8843388557434082, "bits_per_byte": 1.275831281647352, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 281, "native_id": "Mercury_SC_406482", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.675094723701477, "logits_per_token_corr": -1.675094723701477, "logits_per_char_corr": -0.8375473618507385, "bits_per_byte_corr": 1.2083254254525628}, "model_output": [{"sum_logits": -1.5588322877883911, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.5588322877883911, "logits_per_char": -0.7794161438941956, "bits_per_byte": 1.1244598055857624, "num_chars": 2}, {"sum_logits": -1.1850262880325317, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.1850262880325317, "logits_per_char": -0.5925131440162659, "bits_per_byte": 0.854815774534409, "num_chars": 2}, {"sum_logits": -1.2652684450149536, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.2652684450149536, "logits_per_char": -0.6326342225074768, "bits_per_byte": 0.9126982555088375, "num_chars": 2}, {"sum_logits": -1.675094723701477, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.675094723701477, "logits_per_char": -0.8375473618507385, "bits_per_byte": 1.2083254254525628, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 282, "native_id": "OHAT_2007_8_24", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2748178243637085, "logits_per_token_corr": -1.2748178243637085, "logits_per_char_corr": -0.6374089121818542, "bits_per_byte_corr": 0.9195866766238505}, "model_output": [{"sum_logits": -1.2748178243637085, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.2748178243637085, "logits_per_char": -0.6374089121818542, "bits_per_byte": 0.9195866766238505, "num_chars": 2}, {"sum_logits": -0.9933522939682007, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -0.9933522939682007, "logits_per_char": -0.49667614698410034, "bits_per_byte": 0.7165522141823142, "num_chars": 2}, {"sum_logits": -1.4958852529525757, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4958852529525757, "logits_per_char": -0.7479426264762878, "bits_per_byte": 1.079053118087582, "num_chars": 2}, {"sum_logits": -2.1359400749206543, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -2.1359400749206543, "logits_per_char": -1.0679700374603271, "bits_per_byte": 1.5407550768631213, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 283, "native_id": "Mercury_188335", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0477890968322754, "logits_per_token_corr": -1.0477890968322754, "logits_per_char_corr": -0.5238945484161377, "bits_per_byte_corr": 0.7558200669492678}, "model_output": [{"sum_logits": -1.2219452857971191, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.2219452857971191, "logits_per_char": -0.6109726428985596, "bits_per_byte": 0.8814472020292087, "num_chars": 2}, {"sum_logits": -1.0477890968322754, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.0477890968322754, "logits_per_char": -0.5238945484161377, "bits_per_byte": 0.7558200669492678, "num_chars": 2}, {"sum_logits": -1.6222176551818848, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.6222176551818848, "logits_per_char": -0.8111088275909424, "bits_per_byte": 1.1701826831875546, "num_chars": 2}, {"sum_logits": -1.953026294708252, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.953026294708252, "logits_per_char": -0.976513147354126, "bits_per_byte": 1.4088106750516833, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 284, "native_id": "Mercury_7128555", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9178699254989624, "logits_per_token_corr": -0.9178699254989624, "logits_per_char_corr": -0.4589349627494812, "bits_per_byte_corr": 0.6621031948497134}, "model_output": [{"sum_logits": -1.5998233556747437, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.5998233556747437, "logits_per_char": -0.7999116778373718, "bits_per_byte": 1.1540286107659756, "num_chars": 2}, {"sum_logits": -0.9178699254989624, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": true, "logits_per_token": -0.9178699254989624, "logits_per_char": -0.4589349627494812, "bits_per_byte": 0.6621031948497134, "num_chars": 2}, {"sum_logits": -1.423712134361267, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.423712134361267, "logits_per_char": -0.7118560671806335, "bits_per_byte": 1.0269912179489586, "num_chars": 2}, {"sum_logits": -1.9128135442733765, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.9128135442733765, "logits_per_char": -0.9564067721366882, "bits_per_byte": 1.3798033072352123, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 285, "native_id": "Mercury_407517", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3126018047332764, "logits_per_token_corr": -1.3126018047332764, "logits_per_char_corr": -0.6563009023666382, "bits_per_byte_corr": 0.9468420571759809}, "model_output": [{"sum_logits": -1.395085096359253, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.395085096359253, "logits_per_char": -0.6975425481796265, "bits_per_byte": 1.006341175068521, "num_chars": 2}, {"sum_logits": -1.4907519817352295, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4907519817352295, "logits_per_char": -0.7453759908676147, "bits_per_byte": 1.0753502456231776, "num_chars": 2}, {"sum_logits": -1.3126018047332764, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.3126018047332764, "logits_per_char": -0.6563009023666382, "bits_per_byte": 0.9468420571759809, "num_chars": 2}, {"sum_logits": -1.9417731761932373, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.9417731761932373, "logits_per_char": -0.9708865880966187, "bits_per_byte": 1.4006932659136038, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 286, "native_id": "Mercury_405950", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5486751794815063, "logits_per_token_corr": -1.5486751794815063, "logits_per_char_corr": -0.7743375897407532, "bits_per_byte_corr": 1.1171330006937}, "model_output": [{"sum_logits": -1.1111980676651, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.1111980676651, "logits_per_char": -0.55559903383255, "bits_per_byte": 0.8015599708334952, "num_chars": 2}, {"sum_logits": -1.1084991693496704, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.1084991693496704, "logits_per_char": -0.5542495846748352, "bits_per_byte": 0.7996131272257269, "num_chars": 2}, {"sum_logits": -1.5486751794815063, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.5486751794815063, "logits_per_char": -0.7743375897407532, "bits_per_byte": 1.1171330006937, "num_chars": 2}, {"sum_logits": -2.1311755180358887, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -2.1311755180358887, "logits_per_char": -1.0655877590179443, "bits_per_byte": 1.5373181755682765, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 287, "native_id": "MCAS_2004_9_5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3395605087280273, "logits_per_token_corr": -1.3395605087280273, "logits_per_char_corr": -0.6697802543640137, "bits_per_byte_corr": 0.9662886514570053}, "model_output": [{"sum_logits": -1.3413724899291992, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3413724899291992, "logits_per_char": -0.6706862449645996, "bits_per_byte": 0.9675957196035635, "num_chars": 2}, {"sum_logits": -1.2010889053344727, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.2010889053344727, "logits_per_char": -0.6005444526672363, "bits_per_byte": 0.8664025036970211, "num_chars": 2}, {"sum_logits": -1.3395605087280273, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3395605087280273, "logits_per_char": -0.6697802543640137, "bits_per_byte": 0.9662886514570053, "num_chars": 2}, {"sum_logits": -1.791031837463379, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.791031837463379, "logits_per_char": -0.8955159187316895, "bits_per_byte": 1.2919563749922605, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 288, "native_id": "NCEOGA_2013_8_28", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.8919023871421814, "logits_per_token_corr": -0.8919023871421814, "logits_per_char_corr": -0.4459511935710907, "bits_per_byte_corr": 0.6433715754439889}, "model_output": [{"sum_logits": -1.338965892791748, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.338965892791748, "logits_per_char": -0.669482946395874, "bits_per_byte": 0.965859726725753, "num_chars": 2}, {"sum_logits": -0.8919023871421814, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -0.8919023871421814, "logits_per_char": -0.4459511935710907, "bits_per_byte": 0.6433715754439889, "num_chars": 2}, {"sum_logits": -1.5759425163269043, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.5759425163269043, "logits_per_char": -0.7879712581634521, "bits_per_byte": 1.1368022265162663, "num_chars": 2}, {"sum_logits": -2.178515911102295, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -2.178515911102295, "logits_per_char": -1.0892579555511475, "bits_per_byte": 1.5714670507236204, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 289, "native_id": "Mercury_SC_406451", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.51483154296875, "logits_per_token_corr": -1.51483154296875, "logits_per_char_corr": -0.757415771484375, "bits_per_byte_corr": 1.0927199774123812}, "model_output": [{"sum_logits": -1.292715072631836, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.292715072631836, "logits_per_char": -0.646357536315918, "bits_per_byte": 0.9324968122848528, "num_chars": 2}, {"sum_logits": -1.0365638732910156, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": true, "logits_per_token": -1.0365638732910156, "logits_per_char": -0.5182819366455078, "bits_per_byte": 0.7477227797813393, "num_chars": 2}, {"sum_logits": -1.51483154296875, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.51483154296875, "logits_per_char": -0.757415771484375, "bits_per_byte": 1.0927199774123812, "num_chars": 2}, {"sum_logits": -1.9582881927490234, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.9582881927490234, "logits_per_char": -0.9791440963745117, "bits_per_byte": 1.412606332156228, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 290, "native_id": "Mercury_7109323", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8194913864135742, "logits_per_token_corr": -1.8194913864135742, "logits_per_char_corr": -0.9097456932067871, "bits_per_byte_corr": 1.312485600060467}, "model_output": [{"sum_logits": -1.4570016860961914, "num_tokens": 1, "num_tokens_all": 424, "is_greedy": false, "logits_per_token": -1.4570016860961914, "logits_per_char": -0.7285008430480957, "bits_per_byte": 1.051004553549672, "num_chars": 2}, {"sum_logits": -0.9824323654174805, "num_tokens": 1, "num_tokens_all": 424, "is_greedy": true, "logits_per_token": -0.9824323654174805, "logits_per_char": -0.49121618270874023, "bits_per_byte": 0.7086751507988156, "num_chars": 2}, {"sum_logits": -1.524540901184082, "num_tokens": 1, "num_tokens_all": 424, "is_greedy": false, "logits_per_token": -1.524540901184082, "logits_per_char": -0.762270450592041, "bits_per_byte": 1.0997237988861233, "num_chars": 2}, {"sum_logits": -1.8194913864135742, "num_tokens": 1, "num_tokens_all": 424, "is_greedy": false, "logits_per_token": -1.8194913864135742, "logits_per_char": -0.9097456932067871, "bits_per_byte": 1.312485600060467, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 291, "native_id": "Mercury_404132", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1430394649505615, "logits_per_token_corr": -1.1430394649505615, "logits_per_char_corr": -0.5715197324752808, "bits_per_byte_corr": 0.8245286838128669}, "model_output": [{"sum_logits": -1.6014974117279053, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.6014974117279053, "logits_per_char": -0.8007487058639526, "bits_per_byte": 1.1552361869490095, "num_chars": 2}, {"sum_logits": -1.1430394649505615, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": true, "logits_per_token": -1.1430394649505615, "logits_per_char": -0.5715197324752808, "bits_per_byte": 0.8245286838128669, "num_chars": 2}, {"sum_logits": -1.244903326034546, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.244903326034546, "logits_per_char": -0.622451663017273, "bits_per_byte": 0.898007927428753, "num_chars": 2}, {"sum_logits": -1.7568891048431396, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.7568891048431396, "logits_per_char": -0.8784445524215698, "bits_per_byte": 1.2673275994754343, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 292, "native_id": "Mercury_7210210", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4624252319335938, "logits_per_token_corr": -1.4624252319335938, "logits_per_char_corr": -0.7312126159667969, "bits_per_byte_corr": 1.0549168148915018}, "model_output": [{"sum_logits": -1.216165542602539, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.216165542602539, "logits_per_char": -0.6080827713012695, "bits_per_byte": 0.8772779986069895, "num_chars": 2}, {"sum_logits": -1.216257095336914, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.216257095336914, "logits_per_char": -0.608128547668457, "bits_per_byte": 0.8773440399449208, "num_chars": 2}, {"sum_logits": -1.4624252319335938, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4624252319335938, "logits_per_char": -0.7312126159667969, "bits_per_byte": 1.0549168148915018, "num_chars": 2}, {"sum_logits": -1.8098869323730469, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.8098869323730469, "logits_per_char": -0.9049434661865234, "bits_per_byte": 1.3055574509531047, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 293, "native_id": "Mercury_SC_408042", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0643060207366943, "logits_per_token_corr": -1.0643060207366943, "logits_per_char_corr": -0.5321530103683472, "bits_per_byte_corr": 0.7677345090530991}, "model_output": [{"sum_logits": -1.3358895778656006, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3358895778656006, "logits_per_char": -0.6679447889328003, "bits_per_byte": 0.9636406345816687, "num_chars": 2}, {"sum_logits": -1.0643060207366943, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.0643060207366943, "logits_per_char": -0.5321530103683472, "bits_per_byte": 0.7677345090530991, "num_chars": 2}, {"sum_logits": -1.5228803157806396, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.5228803157806396, "logits_per_char": -0.7614401578903198, "bits_per_byte": 1.0985259397228628, "num_chars": 2}, {"sum_logits": -1.8920495510101318, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.8920495510101318, "logits_per_char": -0.9460247755050659, "bits_per_byte": 1.364825252180234, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 294, "native_id": "MCAS_2004_8_14", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3954561948776245, "logits_per_token_corr": -1.3954561948776245, "logits_per_char_corr": -0.6977280974388123, "bits_per_byte_corr": 1.006608866064589}, "model_output": [{"sum_logits": -1.3429049253463745, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3429049253463745, "logits_per_char": -0.6714524626731873, "bits_per_byte": 0.968701138091985, "num_chars": 2}, {"sum_logits": -1.3216456174850464, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.3216456174850464, "logits_per_char": -0.6608228087425232, "bits_per_byte": 0.953365789079839, "num_chars": 2}, {"sum_logits": -1.3954561948776245, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3954561948776245, "logits_per_char": -0.6977280974388123, "bits_per_byte": 1.006608866064589, "num_chars": 2}, {"sum_logits": -1.5524595975875854, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.5524595975875854, "logits_per_char": -0.7762297987937927, "bits_per_byte": 1.119862881310847, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 295, "native_id": "TIMSS_2011_4_pg5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1467092037200928, "logits_per_token_corr": -1.1467092037200928, "logits_per_char_corr": -0.5733546018600464, "bits_per_byte_corr": 0.8271758407749492}, "model_output": [{"sum_logits": -1.5482003688812256, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.5482003688812256, "logits_per_char": -0.7741001844406128, "bits_per_byte": 1.1167904972445064, "num_chars": 2}, {"sum_logits": -1.1467092037200928, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.1467092037200928, "logits_per_char": -0.5733546018600464, "bits_per_byte": 0.8271758407749492, "num_chars": 2}, {"sum_logits": -1.1526086330413818, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.1526086330413818, "logits_per_char": -0.5763043165206909, "bits_per_byte": 0.8314313794879016, "num_chars": 2}, {"sum_logits": -1.9724996089935303, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.9724996089935303, "logits_per_char": -0.9862498044967651, "bits_per_byte": 1.422857702026215, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 296, "native_id": "Mercury_SC_406833", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5042681694030762, "logits_per_token_corr": -1.5042681694030762, "logits_per_char_corr": -0.7521340847015381, "bits_per_byte_corr": 1.0851001140832481}, "model_output": [{"sum_logits": -1.4104018211364746, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4104018211364746, "logits_per_char": -0.7052009105682373, "bits_per_byte": 1.0173898565079083, "num_chars": 2}, {"sum_logits": -0.9877867102622986, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -0.9877867102622986, "logits_per_char": -0.4938933551311493, "bits_per_byte": 0.7125374941762327, "num_chars": 2}, {"sum_logits": -1.5042681694030762, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.5042681694030762, "logits_per_char": -0.7521340847015381, "bits_per_byte": 1.0851001140832481, "num_chars": 2}, {"sum_logits": -1.9075942039489746, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.9075942039489746, "logits_per_char": -0.9537971019744873, "bits_per_byte": 1.3760383490338464, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 297, "native_id": "Mercury_7029558", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.296079158782959, "logits_per_token_corr": -1.296079158782959, "logits_per_char_corr": -0.6480395793914795, "bits_per_byte_corr": 0.9349234874885288}, "model_output": [{"sum_logits": -1.6245360374450684, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.6245360374450684, "logits_per_char": -0.8122680187225342, "bits_per_byte": 1.1718550424845458, "num_chars": 2}, {"sum_logits": -1.1446967124938965, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.1446967124938965, "logits_per_char": -0.5723483562469482, "bits_per_byte": 0.8257241352190152, "num_chars": 2}, {"sum_logits": -1.296079158782959, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.296079158782959, "logits_per_char": -0.6480395793914795, "bits_per_byte": 0.9349234874885288, "num_chars": 2}, {"sum_logits": -1.6425490379333496, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.6425490379333496, "logits_per_char": -0.8212745189666748, "bits_per_byte": 1.1848486757225418, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 298, "native_id": "Mercury_7138390", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0596530437469482, "logits_per_token_corr": -1.0596530437469482, "logits_per_char_corr": -0.5298265218734741, "bits_per_byte_corr": 0.7643780956388582}, "model_output": [{"sum_logits": -1.2600314617156982, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.2600314617156982, "logits_per_char": -0.6300157308578491, "bits_per_byte": 0.9089205705913078, "num_chars": 2}, {"sum_logits": -1.0596530437469482, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": true, "logits_per_token": -1.0596530437469482, "logits_per_char": -0.5298265218734741, "bits_per_byte": 0.7643780956388582, "num_chars": 2}, {"sum_logits": -1.5415847301483154, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.5415847301483154, "logits_per_char": -0.7707923650741577, "bits_per_byte": 1.1120183226483618, "num_chars": 2}, {"sum_logits": -1.9945456981658936, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.9945456981658936, "logits_per_char": -0.9972728490829468, "bits_per_byte": 1.4387605937862087, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 299, "native_id": "MEAP_2005_5_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.541535496711731, "logits_per_token_corr": -1.541535496711731, "logits_per_char_corr": -0.7707677483558655, "bits_per_byte_corr": 1.1119828082309584}, "model_output": [{"sum_logits": -1.541535496711731, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -1.541535496711731, "logits_per_char": -0.7707677483558655, "bits_per_byte": 1.1119828082309584, "num_chars": 2}, {"sum_logits": -1.02100670337677, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": true, "logits_per_token": -1.02100670337677, "logits_per_char": -0.510503351688385, "bits_per_byte": 0.7365006538385566, "num_chars": 2}, {"sum_logits": -1.1844264268875122, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -1.1844264268875122, "logits_per_char": -0.5922132134437561, "bits_per_byte": 0.854383066184838, "num_chars": 2}, {"sum_logits": -2.2310705184936523, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -2.2310705184936523, "logits_per_char": -1.1155352592468262, "bits_per_byte": 1.6093771864533366, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 300, "native_id": "MCAS_2000_4_30", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5713679790496826, "logits_per_token_corr": -1.5713679790496826, "logits_per_char_corr": -0.7856839895248413, "bits_per_byte_corr": 1.1335023953941592}, "model_output": [{"sum_logits": -1.4991366863250732, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4991366863250732, "logits_per_char": -0.7495683431625366, "bits_per_byte": 1.0813985314887253, "num_chars": 2}, {"sum_logits": -1.177168607711792, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.177168607711792, "logits_per_char": -0.588584303855896, "bits_per_byte": 0.849147656318594, "num_chars": 2}, {"sum_logits": -1.4071643352508545, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4071643352508545, "logits_per_char": -0.7035821676254272, "bits_per_byte": 1.0150545040918406, "num_chars": 2}, {"sum_logits": -1.5713679790496826, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.5713679790496826, "logits_per_char": -0.7856839895248413, "bits_per_byte": 1.1335023953941592, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 301, "native_id": "MCAS_1998_4_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.339171290397644, "logits_per_token_corr": -1.339171290397644, "logits_per_char_corr": -0.669585645198822, "bits_per_byte_corr": 0.9660078897794715}, "model_output": [{"sum_logits": -1.406718134880066, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.406718134880066, "logits_per_char": -0.703359067440033, "bits_per_byte": 1.0147326385607505, "num_chars": 2}, {"sum_logits": -1.0727070569992065, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.0727070569992065, "logits_per_char": -0.5363535284996033, "bits_per_byte": 0.7737945757302309, "num_chars": 2}, {"sum_logits": -1.339171290397644, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.339171290397644, "logits_per_char": -0.669585645198822, "bits_per_byte": 0.9660078897794715, "num_chars": 2}, {"sum_logits": -1.9872177839279175, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.9872177839279175, "logits_per_char": -0.9936088919639587, "bits_per_byte": 1.4334746210206109, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 302, "native_id": "Mercury_175840", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.175546407699585, "logits_per_token_corr": -1.175546407699585, "logits_per_char_corr": -0.5877732038497925, "bits_per_byte_corr": 0.8479774863621226}, "model_output": [{"sum_logits": -1.2514874935150146, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.2514874935150146, "logits_per_char": -0.6257437467575073, "bits_per_byte": 0.9027574003149837, "num_chars": 2}, {"sum_logits": -1.175546407699585, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.175546407699585, "logits_per_char": -0.5877732038497925, "bits_per_byte": 0.8479774863621226, "num_chars": 2}, {"sum_logits": -1.359973669052124, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.359973669052124, "logits_per_char": -0.679986834526062, "bits_per_byte": 0.9810136340412386, "num_chars": 2}, {"sum_logits": -1.9809491634368896, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.9809491634368896, "logits_per_char": -0.9904745817184448, "bits_per_byte": 1.4289527671727973, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 303, "native_id": "Mercury_7099190", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3427149057388306, "logits_per_token_corr": -1.3427149057388306, "logits_per_char_corr": -0.6713574528694153, "bits_per_byte_corr": 0.9685640679192472}, "model_output": [{"sum_logits": -1.3458982706069946, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3458982706069946, "logits_per_char": -0.6729491353034973, "bits_per_byte": 0.9708603802735691, "num_chars": 2}, {"sum_logits": -1.153795838356018, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.153795838356018, "logits_per_char": -0.576897919178009, "bits_per_byte": 0.8322877670978736, "num_chars": 2}, {"sum_logits": -1.3427149057388306, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3427149057388306, "logits_per_char": -0.6713574528694153, "bits_per_byte": 0.9685640679192472, "num_chars": 2}, {"sum_logits": -1.8882452249526978, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.8882452249526978, "logits_per_char": -0.9441226124763489, "bits_per_byte": 1.3620810110117396, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 304, "native_id": "Mercury_SC_401605", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5182526111602783, "logits_per_token_corr": -1.5182526111602783, "logits_per_char_corr": -0.7591263055801392, "bits_per_byte_corr": 1.0951877564696135}, "model_output": [{"sum_logits": -1.5182526111602783, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5182526111602783, "logits_per_char": -0.7591263055801392, "bits_per_byte": 1.0951877564696135, "num_chars": 2}, {"sum_logits": -1.0976612567901611, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.0976612567901611, "logits_per_char": -0.5488306283950806, "bits_per_byte": 0.7917952258741251, "num_chars": 2}, {"sum_logits": -1.4342987537384033, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4342987537384033, "logits_per_char": -0.7171493768692017, "bits_per_byte": 1.0346278495865509, "num_chars": 2}, {"sum_logits": -1.6443302631378174, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.6443302631378174, "logits_per_char": -0.8221651315689087, "bits_per_byte": 1.1861335581071388, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 305, "native_id": "TAKS_2009_5_36", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2989163398742676, "logits_per_token_corr": -1.2989163398742676, "logits_per_char_corr": -0.6494581699371338, "bits_per_byte_corr": 0.9369700810337978}, "model_output": [{"sum_logits": -1.2989163398742676, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.2989163398742676, "logits_per_char": -0.6494581699371338, "bits_per_byte": 0.9369700810337978, "num_chars": 2}, {"sum_logits": -1.1696534156799316, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.1696534156799316, "logits_per_char": -0.5848267078399658, "bits_per_byte": 0.8437265911807436, "num_chars": 2}, {"sum_logits": -1.37785005569458, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.37785005569458, "logits_per_char": -0.68892502784729, "bits_per_byte": 0.9939087212202904, "num_chars": 2}, {"sum_logits": -1.8976426124572754, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.8976426124572754, "logits_per_char": -0.9488213062286377, "bits_per_byte": 1.3688597931868276, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 306, "native_id": "Mercury_7171570", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5495119094848633, "logits_per_token_corr": -1.5495119094848633, "logits_per_char_corr": -0.7747559547424316, "bits_per_byte_corr": 1.1177365738069034}, "model_output": [{"sum_logits": -1.4690256118774414, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4690256118774414, "logits_per_char": -0.7345128059387207, "bits_per_byte": 1.0596779825979912, "num_chars": 2}, {"sum_logits": -1.1954336166381836, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.1954336166381836, "logits_per_char": -0.5977168083190918, "bits_per_byte": 0.8623230752185523, "num_chars": 2}, {"sum_logits": -1.4462270736694336, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4462270736694336, "logits_per_char": -0.7231135368347168, "bits_per_byte": 1.0432323135918742, "num_chars": 2}, {"sum_logits": -1.5495119094848633, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.5495119094848633, "logits_per_char": -0.7747559547424316, "bits_per_byte": 1.1177365738069034, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 307, "native_id": "Mercury_SC_402057", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2185454368591309, "logits_per_token_corr": -1.2185454368591309, "logits_per_char_corr": -0.6092727184295654, "bits_per_byte_corr": 0.8789947294279032}, "model_output": [{"sum_logits": -1.6836810111999512, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.6836810111999512, "logits_per_char": -0.8418405055999756, "bits_per_byte": 1.214519122649415, "num_chars": 2}, {"sum_logits": -1.255509853363037, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.255509853363037, "logits_per_char": -0.6277549266815186, "bits_per_byte": 0.9056589196176923, "num_chars": 2}, {"sum_logits": -1.2185454368591309, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2185454368591309, "logits_per_char": -0.6092727184295654, "bits_per_byte": 0.8789947294279032, "num_chars": 2}, {"sum_logits": -1.5058627128601074, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5058627128601074, "logits_per_char": -0.7529313564300537, "bits_per_byte": 1.0862503340522194, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 308, "native_id": "Mercury_SC_413628", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5194405317306519, "logits_per_token_corr": -1.5194405317306519, "logits_per_char_corr": -0.7597202658653259, "bits_per_byte_corr": 1.096044660027538}, "model_output": [{"sum_logits": -1.2498196363449097, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.2498196363449097, "logits_per_char": -0.6249098181724548, "bits_per_byte": 0.9015542956808721, "num_chars": 2}, {"sum_logits": -1.1121548414230347, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.1121548414230347, "logits_per_char": -0.5560774207115173, "bits_per_byte": 0.8022501372114081, "num_chars": 2}, {"sum_logits": -1.5194405317306519, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.5194405317306519, "logits_per_char": -0.7597202658653259, "bits_per_byte": 1.096044660027538, "num_chars": 2}, {"sum_logits": -1.9009751081466675, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.9009751081466675, "logits_per_char": -0.9504875540733337, "bits_per_byte": 1.3712636806892642, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 309, "native_id": "Mercury_LBS10131", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6556861400604248, "logits_per_token_corr": -1.6556861400604248, "logits_per_char_corr": -0.8278430700302124, "bits_per_byte_corr": 1.1943250917677404}, "model_output": [{"sum_logits": -1.438765287399292, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.438765287399292, "logits_per_char": -0.719382643699646, "bits_per_byte": 1.037849772567817, "num_chars": 2}, {"sum_logits": -1.1027324199676514, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.1027324199676514, "logits_per_char": -0.5513662099838257, "bits_per_byte": 0.7954532968579796, "num_chars": 2}, {"sum_logits": -1.6556861400604248, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.6556861400604248, "logits_per_char": -0.8278430700302124, "bits_per_byte": 1.1943250917677404, "num_chars": 2}, {"sum_logits": -1.5203197002410889, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.5203197002410889, "logits_per_char": -0.7601598501205444, "bits_per_byte": 1.0966788460525951, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 310, "native_id": "Mercury_7032428", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2986057996749878, "logits_per_token_corr": -1.2986057996749878, "logits_per_char_corr": -0.6493028998374939, "bits_per_byte_corr": 0.9367460736310488}, "model_output": [{"sum_logits": -1.2780731916427612, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": true, "logits_per_token": -1.2780731916427612, "logits_per_char": -0.6390365958213806, "bits_per_byte": 0.9219349277387331, "num_chars": 2}, {"sum_logits": -1.2986057996749878, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.2986057996749878, "logits_per_char": -0.6493028998374939, "bits_per_byte": 0.9367460736310488, "num_chars": 2}, {"sum_logits": -1.2966927289962769, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.2966927289962769, "logits_per_char": -0.6483463644981384, "bits_per_byte": 0.9353660848405246, "num_chars": 2}, {"sum_logits": -1.8758095502853394, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.8758095502853394, "logits_per_char": -0.9379047751426697, "bits_per_byte": 1.35311056792538, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 311, "native_id": "Mercury_7025008", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.8247469663619995, "logits_per_token_corr": -0.8247469663619995, "logits_per_char_corr": -0.41237348318099976, "bits_per_byte_corr": 0.5949291791797642}, "model_output": [{"sum_logits": -0.8247469663619995, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -0.8247469663619995, "logits_per_char": -0.41237348318099976, "bits_per_byte": 0.5949291791797642, "num_chars": 2}, {"sum_logits": -1.2320517301559448, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.2320517301559448, "logits_per_char": -0.6160258650779724, "bits_per_byte": 0.888737460607963, "num_chars": 2}, {"sum_logits": -1.7745360136032104, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.7745360136032104, "logits_per_char": -0.8872680068016052, "bits_per_byte": 1.2800571533530305, "num_chars": 2}, {"sum_logits": -2.3972015380859375, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -2.3972015380859375, "logits_per_char": -1.1986007690429688, "bits_per_byte": 1.729215385505231, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 312, "native_id": "MEA_2011_8_19", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6491856575012207, "logits_per_token_corr": -1.6491856575012207, "logits_per_char_corr": -0.8245928287506104, "bits_per_byte_corr": 1.1896359847919624}, "model_output": [{"sum_logits": -1.343456745147705, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.343456745147705, "logits_per_char": -0.6717283725738525, "bits_per_byte": 0.9690991919374072, "num_chars": 2}, {"sum_logits": -1.2225651741027832, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.2225651741027832, "logits_per_char": -0.6112825870513916, "bits_per_byte": 0.8818943569214523, "num_chars": 2}, {"sum_logits": -1.4738659858703613, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4738659858703613, "logits_per_char": -0.7369329929351807, "bits_per_byte": 1.0631695743758105, "num_chars": 2}, {"sum_logits": -1.6491856575012207, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.6491856575012207, "logits_per_char": -0.8245928287506104, "bits_per_byte": 1.1896359847919624, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 313, "native_id": "NYSEDREGENTS_2008_8_27", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3614126443862915, "logits_per_token_corr": -1.3614126443862915, "logits_per_char_corr": -0.6807063221931458, "bits_per_byte_corr": 0.9820516353305219}, "model_output": [{"sum_logits": -1.4308172464370728, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4308172464370728, "logits_per_char": -0.7154086232185364, "bits_per_byte": 1.032116472927325, "num_chars": 2}, {"sum_logits": -1.6995741128921509, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.6995741128921509, "logits_per_char": -0.8497870564460754, "bits_per_byte": 1.2259835721472634, "num_chars": 2}, {"sum_logits": -1.3614126443862915, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.3614126443862915, "logits_per_char": -0.6807063221931458, "bits_per_byte": 0.9820516353305219, "num_chars": 2}, {"sum_logits": -1.4216142892837524, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4216142892837524, "logits_per_char": -0.7108071446418762, "bits_per_byte": 1.0254779426040157, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 314, "native_id": "VASoL_2007_5_22", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.142256736755371, "logits_per_token_corr": -1.142256736755371, "logits_per_char_corr": -0.5711283683776855, "bits_per_byte_corr": 0.823964064770084}, "model_output": [{"sum_logits": -1.372267723083496, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.372267723083496, "logits_per_char": -0.686133861541748, "bits_per_byte": 0.9898819194329858, "num_chars": 2}, {"sum_logits": -1.142256736755371, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.142256736755371, "logits_per_char": -0.5711283683776855, "bits_per_byte": 0.823964064770084, "num_chars": 2}, {"sum_logits": -1.291346549987793, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.291346549987793, "logits_per_char": -0.6456732749938965, "bits_per_byte": 0.9315096318688997, "num_chars": 2}, {"sum_logits": -1.9722089767456055, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.9722089767456055, "logits_per_char": -0.9861044883728027, "bits_per_byte": 1.422648055174813, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 315, "native_id": "NCEOGA_2013_5_19", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6119639873504639, "logits_per_token_corr": -1.6119639873504639, "logits_per_char_corr": -0.8059819936752319, "bits_per_byte_corr": 1.1627862253218924}, "model_output": [{"sum_logits": -1.6119639873504639, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.6119639873504639, "logits_per_char": -0.8059819936752319, "bits_per_byte": 1.1627862253218924, "num_chars": 2}, {"sum_logits": -1.1489112377166748, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.1489112377166748, "logits_per_char": -0.5744556188583374, "bits_per_byte": 0.8287642725383193, "num_chars": 2}, {"sum_logits": -1.213097333908081, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.213097333908081, "logits_per_char": -0.6065486669540405, "bits_per_byte": 0.8750647538730344, "num_chars": 2}, {"sum_logits": -1.7394607067108154, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.7394607067108154, "logits_per_char": -0.8697303533554077, "bits_per_byte": 1.254755667697354, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 316, "native_id": "Mercury_7037555", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5006214380264282, "logits_per_token_corr": -1.5006214380264282, "logits_per_char_corr": -0.7503107190132141, "bits_per_byte_corr": 1.0824695534469742}, "model_output": [{"sum_logits": -1.1474395990371704, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.1474395990371704, "logits_per_char": -0.5737197995185852, "bits_per_byte": 0.8277027096258678, "num_chars": 2}, {"sum_logits": -1.2152000665664673, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.2152000665664673, "logits_per_char": -0.6076000332832336, "bits_per_byte": 0.87658155486232, "num_chars": 2}, {"sum_logits": -1.5006214380264282, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5006214380264282, "logits_per_char": -0.7503107190132141, "bits_per_byte": 1.0824695534469742, "num_chars": 2}, {"sum_logits": -1.9813305139541626, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.9813305139541626, "logits_per_char": -0.9906652569770813, "bits_per_byte": 1.4292278534228526, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 317, "native_id": "Mercury_402132", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1605019569396973, "logits_per_token_corr": -1.1605019569396973, "logits_per_char_corr": -0.5802509784698486, "bits_per_byte_corr": 0.8371252091100208}, "model_output": [{"sum_logits": -1.3179936408996582, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.3179936408996582, "logits_per_char": -0.6589968204498291, "bits_per_byte": 0.9507314448252461, "num_chars": 2}, {"sum_logits": -1.1605019569396973, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.1605019569396973, "logits_per_char": -0.5802509784698486, "bits_per_byte": 0.8371252091100208, "num_chars": 2}, {"sum_logits": -1.4067540168762207, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4067540168762207, "logits_per_char": -0.7033770084381104, "bits_per_byte": 1.0147585219497055, "num_chars": 2}, {"sum_logits": -1.84244966506958, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.84244966506958, "logits_per_char": -0.92122483253479, "bits_per_byte": 1.3290464974426621, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 318, "native_id": "MCAS_2006_8_24", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3460594415664673, "logits_per_token_corr": -1.3460594415664673, "logits_per_char_corr": -0.6730297207832336, "bits_per_byte_corr": 0.9709766405455524}, "model_output": [{"sum_logits": -1.3460594415664673, "num_tokens": 1, "num_tokens_all": 436, "is_greedy": false, "logits_per_token": -1.3460594415664673, "logits_per_char": -0.6730297207832336, "bits_per_byte": 0.9709766405455524, "num_chars": 2}, {"sum_logits": -1.138257622718811, "num_tokens": 1, "num_tokens_all": 436, "is_greedy": true, "logits_per_token": -1.138257622718811, "logits_per_char": -0.5691288113594055, "bits_per_byte": 0.8210793137758345, "num_chars": 2}, {"sum_logits": -1.2972770929336548, "num_tokens": 1, "num_tokens_all": 436, "is_greedy": false, "logits_per_token": -1.2972770929336548, "logits_per_char": -0.6486385464668274, "bits_per_byte": 0.9357876143177897, "num_chars": 2}, {"sum_logits": -2.0109782218933105, "num_tokens": 1, "num_tokens_all": 436, "is_greedy": false, "logits_per_token": -2.0109782218933105, "logits_per_char": -1.0054891109466553, "bits_per_byte": 1.4506141540316344, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 319, "native_id": "Mercury_7128923", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.353547215461731, "logits_per_token_corr": -1.353547215461731, "logits_per_char_corr": -0.6767736077308655, "bits_per_byte_corr": 0.9763779276785537}, "model_output": [{"sum_logits": -1.479359745979309, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.479359745979309, "logits_per_char": -0.7396798729896545, "bits_per_byte": 1.0671324846083197, "num_chars": 2}, {"sum_logits": -1.353547215461731, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.353547215461731, "logits_per_char": -0.6767736077308655, "bits_per_byte": 0.9763779276785537, "num_chars": 2}, {"sum_logits": -1.2603274583816528, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.2603274583816528, "logits_per_char": -0.6301637291908264, "bits_per_byte": 0.9091340870523541, "num_chars": 2}, {"sum_logits": -1.5552855730056763, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.5552855730056763, "logits_per_char": -0.7776427865028381, "bits_per_byte": 1.1219013916715255, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 320, "native_id": "Mercury_416379", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5941466093063354, "logits_per_token_corr": -1.5941466093063354, "logits_per_char_corr": -0.7970733046531677, "bits_per_byte_corr": 1.1499337038489292}, "model_output": [{"sum_logits": -1.5941466093063354, "num_tokens": 1, "num_tokens_all": 432, "is_greedy": false, "logits_per_token": -1.5941466093063354, "logits_per_char": -0.7970733046531677, "bits_per_byte": 1.1499337038489292, "num_chars": 2}, {"sum_logits": -1.1257742643356323, "num_tokens": 1, "num_tokens_all": 432, "is_greedy": true, "logits_per_token": -1.1257742643356323, "logits_per_char": -0.5628871321678162, "bits_per_byte": 0.8120744741593023, "num_chars": 2}, {"sum_logits": -1.3278692960739136, "num_tokens": 1, "num_tokens_all": 432, "is_greedy": false, "logits_per_token": -1.3278692960739136, "logits_per_char": -0.6639346480369568, "bits_per_byte": 0.9578552241979651, "num_chars": 2}, {"sum_logits": -1.708368182182312, "num_tokens": 1, "num_tokens_all": 432, "is_greedy": false, "logits_per_token": -1.708368182182312, "logits_per_char": -0.854184091091156, "bits_per_byte": 1.2323271522243426, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 321, "native_id": "Mercury_7168053", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4930377006530762, "logits_per_token_corr": -1.4930377006530762, "logits_per_char_corr": -0.7465188503265381, "bits_per_byte_corr": 1.0769990432970005}, "model_output": [{"sum_logits": -1.4784636497497559, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4784636497497559, "logits_per_char": -0.7392318248748779, "bits_per_byte": 1.0664860878150513, "num_chars": 2}, {"sum_logits": -1.2095046043395996, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.2095046043395996, "logits_per_char": -0.6047523021697998, "bits_per_byte": 0.872473147307181, "num_chars": 2}, {"sum_logits": -1.4930377006530762, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4930377006530762, "logits_per_char": -0.7465188503265381, "bits_per_byte": 1.0769990432970005, "num_chars": 2}, {"sum_logits": -1.4423174858093262, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4423174858093262, "logits_per_char": -0.7211587429046631, "bits_per_byte": 1.040412142083024, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 322, "native_id": "AKDE&ED_2008_8_1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5575095415115356, "logits_per_token_corr": -1.5575095415115356, "logits_per_char_corr": -0.7787547707557678, "bits_per_byte_corr": 1.1235056458387749}, "model_output": [{"sum_logits": -1.5575095415115356, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.5575095415115356, "logits_per_char": -0.7787547707557678, "bits_per_byte": 1.1235056458387749, "num_chars": 2}, {"sum_logits": -1.25879967212677, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.25879967212677, "logits_per_char": -0.629399836063385, "bits_per_byte": 0.9080320222256245, "num_chars": 2}, {"sum_logits": -1.158467411994934, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.158467411994934, "logits_per_char": -0.579233705997467, "bits_per_byte": 0.8356575951588819, "num_chars": 2}, {"sum_logits": -1.7216178178787231, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.7216178178787231, "logits_per_char": -0.8608089089393616, "bits_per_byte": 1.2418847440807483, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 323, "native_id": "Mercury_SC_415476", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1083723306655884, "logits_per_token_corr": -1.1083723306655884, "logits_per_char_corr": -0.5541861653327942, "bits_per_byte_corr": 0.7995216324554677}, "model_output": [{"sum_logits": -1.4698930978775024, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4698930978775024, "logits_per_char": -0.7349465489387512, "bits_per_byte": 1.060303741473156, "num_chars": 2}, {"sum_logits": -1.1083723306655884, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.1083723306655884, "logits_per_char": -0.5541861653327942, "bits_per_byte": 0.7995216324554677, "num_chars": 2}, {"sum_logits": -1.309517502784729, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.309517502784729, "logits_per_char": -0.6547587513923645, "bits_per_byte": 0.9446172036130925, "num_chars": 2}, {"sum_logits": -1.8358923196792603, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.8358923196792603, "logits_per_char": -0.9179461598396301, "bits_per_byte": 1.3243163726046536, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 324, "native_id": "Mercury_7106960", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.434859275817871, "logits_per_token_corr": -1.434859275817871, "logits_per_char_corr": -0.7174296379089355, "bits_per_byte_corr": 1.0350321807987295}, "model_output": [{"sum_logits": -1.434859275817871, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.434859275817871, "logits_per_char": -0.7174296379089355, "bits_per_byte": 1.0350321807987295, "num_chars": 2}, {"sum_logits": -1.241602897644043, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.241602897644043, "logits_per_char": -0.6208014488220215, "bits_per_byte": 0.8956271715928574, "num_chars": 2}, {"sum_logits": -1.316117286682129, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.316117286682129, "logits_per_char": -0.6580586433410645, "bits_per_byte": 0.9493779413629548, "num_chars": 2}, {"sum_logits": -1.7000398635864258, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.7000398635864258, "logits_per_char": -0.8500199317932129, "bits_per_byte": 1.226319540255724, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 325, "native_id": "Mercury_7160563", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4108301401138306, "logits_per_token_corr": -1.4108301401138306, "logits_per_char_corr": -0.7054150700569153, "bits_per_byte_corr": 1.0176988233401836}, "model_output": [{"sum_logits": -1.663756012916565, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.663756012916565, "logits_per_char": -0.8318780064582825, "bits_per_byte": 1.2001462745428235, "num_chars": 2}, {"sum_logits": -0.9030119180679321, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": true, "logits_per_token": -0.9030119180679321, "logits_per_char": -0.45150595903396606, "bits_per_byte": 0.6513854080305863, "num_chars": 2}, {"sum_logits": -1.4108301401138306, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.4108301401138306, "logits_per_char": -0.7054150700569153, "bits_per_byte": 1.0176988233401836, "num_chars": 2}, {"sum_logits": -1.8916860818862915, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.8916860818862915, "logits_per_char": -0.9458430409431458, "bits_per_byte": 1.3645630646289935, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 326, "native_id": "Mercury_7068583", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2202144861221313, "logits_per_token_corr": -1.2202144861221313, "logits_per_char_corr": -0.6101072430610657, "bits_per_byte_corr": 0.8801986939752693}, "model_output": [{"sum_logits": -1.3258663415908813, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3258663415908813, "logits_per_char": -0.6629331707954407, "bits_per_byte": 0.9564103979480656, "num_chars": 2}, {"sum_logits": -1.2202144861221313, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.2202144861221313, "logits_per_char": -0.6101072430610657, "bits_per_byte": 0.8801986939752693, "num_chars": 2}, {"sum_logits": -1.3064457178115845, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3064457178115845, "logits_per_char": -0.6532228589057922, "bits_per_byte": 0.9424013791393746, "num_chars": 2}, {"sum_logits": -1.85198175907135, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.85198175907135, "logits_per_char": -0.925990879535675, "bits_per_byte": 1.3359224498154876, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 327, "native_id": "Mercury_404638", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.0235483646392822, "logits_per_token_corr": -2.0235483646392822, "logits_per_char_corr": -1.0117741823196411, "bits_per_byte_corr": 1.4596815953330808}, "model_output": [{"sum_logits": -1.5790655612945557, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5790655612945557, "logits_per_char": -0.7895327806472778, "bits_per_byte": 1.1390550272599198, "num_chars": 2}, {"sum_logits": -0.9791433811187744, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -0.9791433811187744, "logits_per_char": -0.4895716905593872, "bits_per_byte": 0.7063026501301615, "num_chars": 2}, {"sum_logits": -1.345461130142212, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.345461130142212, "logits_per_char": -0.672730565071106, "bits_per_byte": 0.9705450500832119, "num_chars": 2}, {"sum_logits": -2.0235483646392822, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -2.0235483646392822, "logits_per_char": -1.0117741823196411, "bits_per_byte": 1.4596815953330808, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 328, "native_id": "Mercury_SC_407138", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0814751386642456, "logits_per_token_corr": -1.0814751386642456, "logits_per_char_corr": -0.5407375693321228, "bits_per_byte_corr": 0.780119409698366}, "model_output": [{"sum_logits": -1.0814751386642456, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.0814751386642456, "logits_per_char": -0.5407375693321228, "bits_per_byte": 0.780119409698366, "num_chars": 2}, {"sum_logits": -1.2082871198654175, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.2082871198654175, "logits_per_char": -0.6041435599327087, "bits_per_byte": 0.8715949179005493, "num_chars": 2}, {"sum_logits": -1.467570185661316, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.467570185661316, "logits_per_char": -0.733785092830658, "bits_per_byte": 1.0586281145057985, "num_chars": 2}, {"sum_logits": -2.1998777389526367, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -2.1998777389526367, "logits_per_char": -1.0999388694763184, "bits_per_byte": 1.5868763522756373, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 329, "native_id": "MCAS_2000_4_10", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.437066674232483, "logits_per_token_corr": -1.437066674232483, "logits_per_char_corr": -0.7185333371162415, "bits_per_byte_corr": 1.036624482171744}, "model_output": [{"sum_logits": -1.5226188898086548, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5226188898086548, "logits_per_char": -0.7613094449043274, "bits_per_byte": 1.0983373607461917, "num_chars": 2}, {"sum_logits": -1.1856170892715454, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.1856170892715454, "logits_per_char": -0.5928085446357727, "bits_per_byte": 0.8552419475432474, "num_chars": 2}, {"sum_logits": -1.5241905450820923, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5241905450820923, "logits_per_char": -0.7620952725410461, "bits_per_byte": 1.0994710703806803, "num_chars": 2}, {"sum_logits": -1.437066674232483, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.437066674232483, "logits_per_char": -0.7185333371162415, "bits_per_byte": 1.036624482171744, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 330, "native_id": "Mercury_177748", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9453730583190918, "logits_per_token_corr": -1.9453730583190918, "logits_per_char_corr": -0.9726865291595459, "bits_per_byte_corr": 1.4032900319589832}, "model_output": [{"sum_logits": -1.524817943572998, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.524817943572998, "logits_per_char": -0.762408971786499, "bits_per_byte": 1.099923642726426, "num_chars": 2}, {"sum_logits": -0.9073024392127991, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -0.9073024392127991, "logits_per_char": -0.45365121960639954, "bits_per_byte": 0.6544803648198529, "num_chars": 2}, {"sum_logits": -1.5462241172790527, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.5462241172790527, "logits_per_char": -0.7731120586395264, "bits_per_byte": 1.1153649330515034, "num_chars": 2}, {"sum_logits": -1.9453730583190918, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.9453730583190918, "logits_per_char": -0.9726865291595459, "bits_per_byte": 1.4032900319589832, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 331, "native_id": "MCAS_2004_9_21-v1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2457977533340454, "logits_per_token_corr": -1.2457977533340454, "logits_per_char_corr": -0.6228988766670227, "bits_per_byte_corr": 0.8986531203434653}, "model_output": [{"sum_logits": -1.4495998620986938, "num_tokens": 1, "num_tokens_all": 432, "is_greedy": false, "logits_per_token": -1.4495998620986938, "logits_per_char": -0.7247999310493469, "bits_per_byte": 1.0456652661623067, "num_chars": 2}, {"sum_logits": -1.1484466791152954, "num_tokens": 1, "num_tokens_all": 432, "is_greedy": true, "logits_per_token": -1.1484466791152954, "logits_per_char": -0.5742233395576477, "bits_per_byte": 0.8284291643431129, "num_chars": 2}, {"sum_logits": -1.2457977533340454, "num_tokens": 1, "num_tokens_all": 432, "is_greedy": false, "logits_per_token": -1.2457977533340454, "logits_per_char": -0.6228988766670227, "bits_per_byte": 0.8986531203434653, "num_chars": 2}, {"sum_logits": -1.9174495935440063, "num_tokens": 1, "num_tokens_all": 432, "is_greedy": false, "logits_per_token": -1.9174495935440063, "logits_per_char": -0.9587247967720032, "bits_per_byte": 1.383147509881242, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 332, "native_id": "MDSA_2007_5_16", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5310120582580566, "logits_per_token_corr": -1.5310120582580566, "logits_per_char_corr": -0.7655060291290283, "bits_per_byte_corr": 1.104391751995845}, "model_output": [{"sum_logits": -1.5310120582580566, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.5310120582580566, "logits_per_char": -0.7655060291290283, "bits_per_byte": 1.104391751995845, "num_chars": 2}, {"sum_logits": -1.2419381141662598, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.2419381141662598, "logits_per_char": -0.6209690570831299, "bits_per_byte": 0.8958689791999708, "num_chars": 2}, {"sum_logits": -1.2817902565002441, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.2817902565002441, "logits_per_char": -0.6408951282501221, "bits_per_byte": 0.9246162232570115, "num_chars": 2}, {"sum_logits": -1.6095776557922363, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.6095776557922363, "logits_per_char": -0.8047888278961182, "bits_per_byte": 1.1610648509694053, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 333, "native_id": "Mercury_401763", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1846985816955566, "logits_per_token_corr": -1.1846985816955566, "logits_per_char_corr": -0.5923492908477783, "bits_per_byte_corr": 0.854579384380798}, "model_output": [{"sum_logits": -1.7537693977355957, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.7537693977355957, "logits_per_char": -0.8768846988677979, "bits_per_byte": 1.2650772064888929, "num_chars": 2}, {"sum_logits": -1.1846985816955566, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -1.1846985816955566, "logits_per_char": -0.5923492908477783, "bits_per_byte": 0.854579384380798, "num_chars": 2}, {"sum_logits": -1.3131356239318848, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.3131356239318848, "logits_per_char": -0.6565678119659424, "bits_per_byte": 0.947227126331263, "num_chars": 2}, {"sum_logits": -1.4726319313049316, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.4726319313049316, "logits_per_char": -0.7363159656524658, "bits_per_byte": 1.0622793921749438, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 334, "native_id": "Mercury_7268118", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3966083526611328, "logits_per_token_corr": -1.3966083526611328, "logits_per_char_corr": -0.6983041763305664, "bits_per_byte_corr": 1.0074399722248841}, "model_output": [{"sum_logits": -1.3784847259521484, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3784847259521484, "logits_per_char": -0.6892423629760742, "bits_per_byte": 0.9943665390368875, "num_chars": 2}, {"sum_logits": -1.0445327758789062, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.0445327758789062, "logits_per_char": -0.5222663879394531, "bits_per_byte": 0.7534711279037819, "num_chars": 2}, {"sum_logits": -1.3966083526611328, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3966083526611328, "logits_per_char": -0.6983041763305664, "bits_per_byte": 1.0074399722248841, "num_chars": 2}, {"sum_logits": -1.993612289428711, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.993612289428711, "logits_per_char": -0.9968061447143555, "bits_per_byte": 1.4380872817080803, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 335, "native_id": "Mercury_403232", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.3464860916137695, "logits_per_token_corr": -2.3464860916137695, "logits_per_char_corr": -1.1732430458068848, "bits_per_byte_corr": 1.6926319239442715}, "model_output": [{"sum_logits": -0.9042844772338867, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -0.9042844772338867, "logits_per_char": -0.45214223861694336, "bits_per_byte": 0.6523033654295672, "num_chars": 2}, {"sum_logits": -1.2989511489868164, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.2989511489868164, "logits_per_char": -0.6494755744934082, "bits_per_byte": 0.9369951905008237, "num_chars": 2}, {"sum_logits": -1.5895318984985352, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.5895318984985352, "logits_per_char": -0.7947659492492676, "bits_per_byte": 1.1466048936501516, "num_chars": 2}, {"sum_logits": -2.3464860916137695, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -2.3464860916137695, "logits_per_char": -1.1732430458068848, "bits_per_byte": 1.6926319239442715, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 336, "native_id": "Mercury_415081", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5897841453552246, "logits_per_token_corr": -1.5897841453552246, "logits_per_char_corr": -0.7948920726776123, "bits_per_byte_corr": 1.1467868512947645}, "model_output": [{"sum_logits": -1.5897841453552246, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.5897841453552246, "logits_per_char": -0.7948920726776123, "bits_per_byte": 1.1467868512947645, "num_chars": 2}, {"sum_logits": -1.679725170135498, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.679725170135498, "logits_per_char": -0.839862585067749, "bits_per_byte": 1.2116655865062973, "num_chars": 2}, {"sum_logits": -1.2651476860046387, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.2651476860046387, "logits_per_char": -0.6325738430023193, "bits_per_byte": 0.9126111462961755, "num_chars": 2}, {"sum_logits": -1.2780566215515137, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.2780566215515137, "logits_per_char": -0.6390283107757568, "bits_per_byte": 0.921922974944498, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 337, "native_id": "Mercury_7206378", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1248984336853027, "logits_per_token_corr": -1.1248984336853027, "logits_per_char_corr": -0.5624492168426514, "bits_per_byte_corr": 0.8114426958913573}, "model_output": [{"sum_logits": -1.5186915397644043, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.5186915397644043, "logits_per_char": -0.7593457698822021, "bits_per_byte": 1.095504376529852, "num_chars": 2}, {"sum_logits": -1.1248984336853027, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.1248984336853027, "logits_per_char": -0.5624492168426514, "bits_per_byte": 0.8114426958913573, "num_chars": 2}, {"sum_logits": -1.241544246673584, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.241544246673584, "logits_per_char": -0.620772123336792, "bits_per_byte": 0.8955848638607452, "num_chars": 2}, {"sum_logits": -1.86214017868042, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.86214017868042, "logits_per_char": -0.93107008934021, "bits_per_byte": 1.34325020061213, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 338, "native_id": "CSZ30169", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.813858985900879, "logits_per_token_corr": -1.813858985900879, "logits_per_char_corr": -0.9069294929504395, "bits_per_byte_corr": 1.308422681916481}, "model_output": [{"sum_logits": -1.813858985900879, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.813858985900879, "logits_per_char": -0.9069294929504395, "bits_per_byte": 1.308422681916481, "num_chars": 2}, {"sum_logits": -1.1548986434936523, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.1548986434936523, "logits_per_char": -0.5774493217468262, "bits_per_byte": 0.8330832728494401, "num_chars": 2}, {"sum_logits": -1.206324577331543, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.206324577331543, "logits_per_char": -0.6031622886657715, "bits_per_byte": 0.8701792427099712, "num_chars": 2}, {"sum_logits": -1.5725584030151367, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.5725584030151367, "logits_per_char": -0.7862792015075684, "bits_per_byte": 1.1343611047699178, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 339, "native_id": "Mercury_7013948", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5781675577163696, "logits_per_token_corr": -1.5781675577163696, "logits_per_char_corr": -0.7890837788581848, "bits_per_byte_corr": 1.1384072546054447}, "model_output": [{"sum_logits": -1.5605207681655884, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.5605207681655884, "logits_per_char": -0.7802603840827942, "bits_per_byte": 1.1256777867191738, "num_chars": 2}, {"sum_logits": -1.1713167428970337, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.1713167428970337, "logits_per_char": -0.5856583714485168, "bits_per_byte": 0.8449264281444887, "num_chars": 2}, {"sum_logits": -1.3556028604507446, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.3556028604507446, "logits_per_char": -0.6778014302253723, "bits_per_byte": 0.977860762094294, "num_chars": 2}, {"sum_logits": -1.5781675577163696, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.5781675577163696, "logits_per_char": -0.7890837788581848, "bits_per_byte": 1.1384072546054447, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 340, "native_id": "Mercury_SC_402164", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.276232123374939, "logits_per_token_corr": -1.276232123374939, "logits_per_char_corr": -0.6381160616874695, "bits_per_byte_corr": 0.9206068777087695}, "model_output": [{"sum_logits": -1.3789066076278687, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3789066076278687, "logits_per_char": -0.6894533038139343, "bits_per_byte": 0.9946708623375895, "num_chars": 2}, {"sum_logits": -1.1115573644638062, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.1115573644638062, "logits_per_char": -0.5557786822319031, "bits_per_byte": 0.8018191486883457, "num_chars": 2}, {"sum_logits": -1.276232123374939, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.276232123374939, "logits_per_char": -0.6381160616874695, "bits_per_byte": 0.9206068777087695, "num_chars": 2}, {"sum_logits": -2.1048874855041504, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -2.1048874855041504, "logits_per_char": -1.0524437427520752, "bits_per_byte": 1.5183553684841296, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 341, "native_id": "Mercury_400880", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.217333197593689, "logits_per_token_corr": -1.217333197593689, "logits_per_char_corr": -0.6086665987968445, "bits_per_byte_corr": 0.8781202836395907}, "model_output": [{"sum_logits": -1.5786556005477905, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.5786556005477905, "logits_per_char": -0.7893278002738953, "bits_per_byte": 1.138759303091761, "num_chars": 2}, {"sum_logits": -1.5427154302597046, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.5427154302597046, "logits_per_char": -0.7713577151298523, "bits_per_byte": 1.112833950370079, "num_chars": 2}, {"sum_logits": -1.217333197593689, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.217333197593689, "logits_per_char": -0.6086665987968445, "bits_per_byte": 0.8781202836395907, "num_chars": 2}, {"sum_logits": -1.4771922826766968, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4771922826766968, "logits_per_char": -0.7385961413383484, "bits_per_byte": 1.0655689903293246, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 342, "native_id": "Mercury_7040793", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2784113883972168, "logits_per_token_corr": -1.2784113883972168, "logits_per_char_corr": -0.6392056941986084, "bits_per_byte_corr": 0.9221788851289822}, "model_output": [{"sum_logits": -1.3947501182556152, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3947501182556152, "logits_per_char": -0.6973750591278076, "bits_per_byte": 1.0060995394440586, "num_chars": 2}, {"sum_logits": -1.2784113883972168, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.2784113883972168, "logits_per_char": -0.6392056941986084, "bits_per_byte": 0.9221788851289822, "num_chars": 2}, {"sum_logits": -1.3781428337097168, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3781428337097168, "logits_per_char": -0.6890714168548584, "bits_per_byte": 0.99411991591555, "num_chars": 2}, {"sum_logits": -1.5650172233581543, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.5650172233581543, "logits_per_char": -0.7825086116790771, "bits_per_byte": 1.1289212935231232, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 343, "native_id": "MDSA_2010_5_29", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6607693433761597, "logits_per_token_corr": -1.6607693433761597, "logits_per_char_corr": -0.8303846716880798, "bits_per_byte_corr": 1.1979918478754634}, "model_output": [{"sum_logits": -1.3953617811203003, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.3953617811203003, "logits_per_char": -0.6976808905601501, "bits_per_byte": 1.0065407609348473, "num_chars": 2}, {"sum_logits": -1.245198130607605, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": true, "logits_per_token": -1.245198130607605, "logits_per_char": -0.6225990653038025, "bits_per_byte": 0.898220583976545, "num_chars": 2}, {"sum_logits": -1.3363007307052612, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.3363007307052612, "logits_per_char": -0.6681503653526306, "bits_per_byte": 0.9639372186630818, "num_chars": 2}, {"sum_logits": -1.6607693433761597, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.6607693433761597, "logits_per_char": -0.8303846716880798, "bits_per_byte": 1.1979918478754634, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 344, "native_id": "LEAP__8_10365", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5913097858428955, "logits_per_token_corr": -1.5913097858428955, "logits_per_char_corr": -0.7956548929214478, "bits_per_byte_corr": 1.1478873682776365}, "model_output": [{"sum_logits": -1.5913097858428955, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.5913097858428955, "logits_per_char": -0.7956548929214478, "bits_per_byte": 1.1478873682776365, "num_chars": 2}, {"sum_logits": -1.0696823596954346, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": true, "logits_per_token": -1.0696823596954346, "logits_per_char": -0.5348411798477173, "bits_per_byte": 0.7716127178300582, "num_chars": 2}, {"sum_logits": -1.4582302570343018, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.4582302570343018, "logits_per_char": -0.7291151285171509, "bits_per_byte": 1.0518907801495685, "num_chars": 2}, {"sum_logits": -1.607529878616333, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.607529878616333, "logits_per_char": -0.8037649393081665, "bits_per_byte": 1.1595876919811436, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 345, "native_id": "Mercury_SC_401295", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9897023439407349, "logits_per_token_corr": -0.9897023439407349, "logits_per_char_corr": -0.49485117197036743, "bits_per_byte_corr": 0.7139193317802536}, "model_output": [{"sum_logits": -1.3669148683547974, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.3669148683547974, "logits_per_char": -0.6834574341773987, "bits_per_byte": 0.9860206509471366, "num_chars": 2}, {"sum_logits": -0.9897023439407349, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": true, "logits_per_token": -0.9897023439407349, "logits_per_char": -0.49485117197036743, "bits_per_byte": 0.7139193317802536, "num_chars": 2}, {"sum_logits": -1.4147931337356567, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.4147931337356567, "logits_per_char": -0.7073965668678284, "bits_per_byte": 1.020557518962827, "num_chars": 2}, {"sum_logits": -2.0908203125, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -2.0908203125, "logits_per_char": -1.04541015625, "bits_per_byte": 1.508208048117915, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 346, "native_id": "MCAS_2012_5_23625", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2316036224365234, "logits_per_token_corr": -1.2316036224365234, "logits_per_char_corr": -0.6158018112182617, "bits_per_byte_corr": 0.8884142192156661}, "model_output": [{"sum_logits": -1.5372657775878906, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.5372657775878906, "logits_per_char": -0.7686328887939453, "bits_per_byte": 1.1089028569279797, "num_chars": 2}, {"sum_logits": -1.215383529663086, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.215383529663086, "logits_per_char": -0.607691764831543, "bits_per_byte": 0.8767138955121591, "num_chars": 2}, {"sum_logits": -1.2316036224365234, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.2316036224365234, "logits_per_char": -0.6158018112182617, "bits_per_byte": 0.8884142192156661, "num_chars": 2}, {"sum_logits": -1.7188148498535156, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.7188148498535156, "logits_per_char": -0.8594074249267578, "bits_per_byte": 1.2398628300458783, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 347, "native_id": "Mercury_7268048", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5068185329437256, "logits_per_token_corr": -1.5068185329437256, "logits_per_char_corr": -0.7534092664718628, "bits_per_byte_corr": 1.0869398124995289}, "model_output": [{"sum_logits": -1.2978341579437256, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.2978341579437256, "logits_per_char": -0.6489170789718628, "bits_per_byte": 0.9361894517815309, "num_chars": 2}, {"sum_logits": -1.0473802089691162, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.0473802089691162, "logits_per_char": -0.5236901044845581, "bits_per_byte": 0.7555251167030379, "num_chars": 2}, {"sum_logits": -1.5068185329437256, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.5068185329437256, "logits_per_char": -0.7534092664718628, "bits_per_byte": 1.0869398124995289, "num_chars": 2}, {"sum_logits": -1.9414002895355225, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.9414002895355225, "logits_per_char": -0.9707001447677612, "bits_per_byte": 1.400424285047654, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 348, "native_id": "Mercury_SC_402629", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3919434547424316, "logits_per_token_corr": -1.3919434547424316, "logits_per_char_corr": -0.6959717273712158, "bits_per_byte_corr": 1.0040749596781}, "model_output": [{"sum_logits": -1.3919434547424316, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3919434547424316, "logits_per_char": -0.6959717273712158, "bits_per_byte": 1.0040749596781, "num_chars": 2}, {"sum_logits": -1.160261631011963, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.160261631011963, "logits_per_char": -0.5801308155059814, "bits_per_byte": 0.8369518505979509, "num_chars": 2}, {"sum_logits": -1.3131756782531738, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3131756782531738, "logits_per_char": -0.6565878391265869, "bits_per_byte": 0.947256019416608, "num_chars": 2}, {"sum_logits": -1.8710007667541504, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.8710007667541504, "logits_per_char": -0.9355003833770752, "bits_per_byte": 1.3496417638488, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 349, "native_id": "NCEOGA_2013_8_42", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.652791976928711, "logits_per_token_corr": -1.652791976928711, "logits_per_char_corr": -0.8263959884643555, "bits_per_byte_corr": 1.1922373943689153}, "model_output": [{"sum_logits": -1.4469165802001953, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4469165802001953, "logits_per_char": -0.7234582901000977, "bits_per_byte": 1.0437296874181698, "num_chars": 2}, {"sum_logits": -1.1134376525878906, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.1134376525878906, "logits_per_char": -0.5567188262939453, "bits_per_byte": 0.8031754898643761, "num_chars": 2}, {"sum_logits": -1.4582290649414062, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4582290649414062, "logits_per_char": -0.7291145324707031, "bits_per_byte": 1.0518899202363141, "num_chars": 2}, {"sum_logits": -1.652791976928711, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.652791976928711, "logits_per_char": -0.8263959884643555, "bits_per_byte": 1.1922373943689153, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 350, "native_id": "Mercury_412463", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7194695472717285, "logits_per_token_corr": -1.7194695472717285, "logits_per_char_corr": -0.8597347736358643, "bits_per_byte_corr": 1.2403350944051479}, "model_output": [{"sum_logits": -1.7595429420471191, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.7595429420471191, "logits_per_char": -0.8797714710235596, "bits_per_byte": 1.2692419383621896, "num_chars": 2}, {"sum_logits": -1.69651460647583, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.69651460647583, "logits_per_char": -0.848257303237915, "bits_per_byte": 1.223776604780065, "num_chars": 2}, {"sum_logits": -1.8576531410217285, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.8576531410217285, "logits_per_char": -0.9288265705108643, "bits_per_byte": 1.3400134871228897, "num_chars": 2}, {"sum_logits": -1.7194695472717285, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.7194695472717285, "logits_per_char": -0.8597347736358643, "bits_per_byte": 1.2403350944051479, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 351, "native_id": "Mercury_409295", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0632636547088623, "logits_per_token_corr": -1.0632636547088623, "logits_per_char_corr": -0.5316318273544312, "bits_per_byte_corr": 0.7669826009035264}, "model_output": [{"sum_logits": -1.2138144969940186, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.2138144969940186, "logits_per_char": -0.6069072484970093, "bits_per_byte": 0.8755820776868302, "num_chars": 2}, {"sum_logits": -1.0632636547088623, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.0632636547088623, "logits_per_char": -0.5316318273544312, "bits_per_byte": 0.7669826009035264, "num_chars": 2}, {"sum_logits": -1.5519835948944092, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.5519835948944092, "logits_per_char": -0.7759917974472046, "bits_per_byte": 1.1195195179483994, "num_chars": 2}, {"sum_logits": -2.034640073776245, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -2.034640073776245, "logits_per_char": -1.0173200368881226, "bits_per_byte": 1.467682572216526, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 352, "native_id": "Mercury_404609", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9164068698883057, "logits_per_token_corr": -1.9164068698883057, "logits_per_char_corr": -0.9582034349441528, "bits_per_byte_corr": 1.382395343757693}, "model_output": [{"sum_logits": -1.490450143814087, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.490450143814087, "logits_per_char": -0.7452250719070435, "bits_per_byte": 1.0751325155871851, "num_chars": 2}, {"sum_logits": -1.046783685684204, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.046783685684204, "logits_per_char": -0.523391842842102, "bits_per_byte": 0.7550948161105788, "num_chars": 2}, {"sum_logits": -1.3405992984771729, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3405992984771729, "logits_per_char": -0.6702996492385864, "bits_per_byte": 0.9670379798668151, "num_chars": 2}, {"sum_logits": -1.9164068698883057, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.9164068698883057, "logits_per_char": -0.9582034349441528, "bits_per_byte": 1.382395343757693, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 353, "native_id": "Mercury_7230090", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2383372783660889, "logits_per_token_corr": -1.2383372783660889, "logits_per_char_corr": -0.6191686391830444, "bits_per_byte_corr": 0.8932715252239879}, "model_output": [{"sum_logits": -1.5240809917449951, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.5240809917449951, "logits_per_char": -0.7620404958724976, "bits_per_byte": 1.0993920443526086, "num_chars": 2}, {"sum_logits": -1.2679030895233154, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.2679030895233154, "logits_per_char": -0.6339515447616577, "bits_per_byte": 0.9145987497921984, "num_chars": 2}, {"sum_logits": -1.2383372783660889, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.2383372783660889, "logits_per_char": -0.6191686391830444, "bits_per_byte": 0.8932715252239879, "num_chars": 2}, {"sum_logits": -1.6638667583465576, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.6638667583465576, "logits_per_char": -0.8319333791732788, "bits_per_byte": 1.2002261604841493, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 354, "native_id": "Mercury_7057488", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9306212663650513, "logits_per_token_corr": -1.9306212663650513, "logits_per_char_corr": -0.9653106331825256, "bits_per_byte_corr": 1.3926488634108154}, "model_output": [{"sum_logits": -1.2289782762527466, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.2289782762527466, "logits_per_char": -0.6144891381263733, "bits_per_byte": 0.8865204322556889, "num_chars": 2}, {"sum_logits": -1.1646262407302856, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.1646262407302856, "logits_per_char": -0.5823131203651428, "bits_per_byte": 0.8401002509959732, "num_chars": 2}, {"sum_logits": -1.4583579301834106, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4583579301834106, "logits_per_char": -0.7291789650917053, "bits_per_byte": 1.0519828768591055, "num_chars": 2}, {"sum_logits": -1.9306212663650513, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.9306212663650513, "logits_per_char": -0.9653106331825256, "bits_per_byte": 1.3926488634108154, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 355, "native_id": "MDSA_2009_4_1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.73653244972229, "logits_per_token_corr": -1.73653244972229, "logits_per_char_corr": -0.868266224861145, "bits_per_byte_corr": 1.2526433767794554}, "model_output": [{"sum_logits": -1.5490858554840088, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.5490858554840088, "logits_per_char": -0.7745429277420044, "bits_per_byte": 1.1174292408098112, "num_chars": 2}, {"sum_logits": -1.315796136856079, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.315796136856079, "logits_per_char": -0.6578980684280396, "bits_per_byte": 0.9491462807322425, "num_chars": 2}, {"sum_logits": -1.1383936405181885, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.1383936405181885, "logits_per_char": -0.5691968202590942, "bits_per_byte": 0.8211774298781519, "num_chars": 2}, {"sum_logits": -1.73653244972229, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.73653244972229, "logits_per_char": -0.868266224861145, "bits_per_byte": 1.2526433767794554, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 356, "native_id": "Mercury_7150728", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.45087468624115, "logits_per_token_corr": -1.45087468624115, "logits_per_char_corr": -0.725437343120575, "bits_per_byte_corr": 1.0465848573964707}, "model_output": [{"sum_logits": -1.45087468624115, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.45087468624115, "logits_per_char": -0.725437343120575, "bits_per_byte": 1.0465848573964707, "num_chars": 2}, {"sum_logits": -1.231258749961853, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.231258749961853, "logits_per_char": -0.6156293749809265, "bits_per_byte": 0.8881654463111929, "num_chars": 2}, {"sum_logits": -1.2170108556747437, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": true, "logits_per_token": -1.2170108556747437, "logits_per_char": -0.6085054278373718, "bits_per_byte": 0.877887763095624, "num_chars": 2}, {"sum_logits": -1.781694769859314, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.781694769859314, "logits_per_char": -0.890847384929657, "bits_per_byte": 1.285221104427841, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 357, "native_id": "Mercury_402207", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1719132661819458, "logits_per_token_corr": -1.1719132661819458, "logits_per_char_corr": -0.5859566330909729, "bits_per_byte_corr": 0.8453567287369478}, "model_output": [{"sum_logits": -1.1537419557571411, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": true, "logits_per_token": -1.1537419557571411, "logits_per_char": -0.5768709778785706, "bits_per_byte": 0.8322488990187785, "num_chars": 2}, {"sum_logits": -1.1719132661819458, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.1719132661819458, "logits_per_char": -0.5859566330909729, "bits_per_byte": 0.8453567287369478, "num_chars": 2}, {"sum_logits": -1.4597340822219849, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.4597340822219849, "logits_per_char": -0.7298670411109924, "bits_per_byte": 1.0529755607198865, "num_chars": 2}, {"sum_logits": -2.061002731323242, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -2.061002731323242, "logits_per_char": -1.030501365661621, "bits_per_byte": 1.4866992098703933, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 358, "native_id": "Mercury_411732", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6850193738937378, "logits_per_token_corr": -1.6850193738937378, "logits_per_char_corr": -0.8425096869468689, "bits_per_byte_corr": 1.215484547260034}, "model_output": [{"sum_logits": -1.405741572380066, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.405741572380066, "logits_per_char": -0.702870786190033, "bits_per_byte": 1.014028197622816, "num_chars": 2}, {"sum_logits": -1.364024043083191, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.364024043083191, "logits_per_char": -0.6820120215415955, "bits_per_byte": 0.9839353613054236, "num_chars": 2}, {"sum_logits": -1.4227322340011597, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4227322340011597, "logits_per_char": -0.7113661170005798, "bits_per_byte": 1.0262843692539119, "num_chars": 2}, {"sum_logits": -1.6850193738937378, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.6850193738937378, "logits_per_char": -0.8425096869468689, "bits_per_byte": 1.215484547260034, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 359, "native_id": "Mercury_7270113", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.392588496208191, "logits_per_token_corr": -1.392588496208191, "logits_per_char_corr": -0.6962942481040955, "bits_per_byte_corr": 1.0045402587400096}, "model_output": [{"sum_logits": -1.450886607170105, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.450886607170105, "logits_per_char": -0.7254433035850525, "bits_per_byte": 1.0465934565290138, "num_chars": 2}, {"sum_logits": -1.0499752759933472, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.0499752759933472, "logits_per_char": -0.5249876379966736, "bits_per_byte": 0.7573970618663555, "num_chars": 2}, {"sum_logits": -1.392588496208191, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.392588496208191, "logits_per_char": -0.6962942481040955, "bits_per_byte": 1.0045402587400096, "num_chars": 2}, {"sum_logits": -1.94231116771698, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.94231116771698, "logits_per_char": -0.97115558385849, "bits_per_byte": 1.401081344765276, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 360, "native_id": "AKDE&ED_2008_8_3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9305884838104248, "logits_per_token_corr": -0.9305884838104248, "logits_per_char_corr": -0.4652942419052124, "bits_per_byte_corr": 0.6712776953513219}, "model_output": [{"sum_logits": -1.9441955089569092, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.9441955089569092, "logits_per_char": -0.9720977544784546, "bits_per_byte": 1.402440609646371, "num_chars": 2}, {"sum_logits": -0.9305884838104248, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -0.9305884838104248, "logits_per_char": -0.4652942419052124, "bits_per_byte": 0.6712776953513219, "num_chars": 2}, {"sum_logits": -1.1847751140594482, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.1847751140594482, "logits_per_char": -0.5923875570297241, "bits_per_byte": 0.854634590811725, "num_chars": 2}, {"sum_logits": -1.9288585186004639, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.9288585186004639, "logits_per_char": -0.9644292593002319, "bits_per_byte": 1.3913773096816604, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 361, "native_id": "MCAS_1999_8_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.333127498626709, "logits_per_token_corr": -1.333127498626709, "logits_per_char_corr": -0.6665637493133545, "bits_per_byte_corr": 0.9616482155714215}, "model_output": [{"sum_logits": -1.333127498626709, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.333127498626709, "logits_per_char": -0.6665637493133545, "bits_per_byte": 0.9616482155714215, "num_chars": 2}, {"sum_logits": -1.0764899253845215, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.0764899253845215, "logits_per_char": -0.5382449626922607, "bits_per_byte": 0.7765233384601475, "num_chars": 2}, {"sum_logits": -1.5239291191101074, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.5239291191101074, "logits_per_char": -0.7619645595550537, "bits_per_byte": 1.099282491404009, "num_chars": 2}, {"sum_logits": -1.814624309539795, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.814624309539795, "logits_per_char": -0.9073121547698975, "bits_per_byte": 1.308974746225751, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 362, "native_id": "NYSEDREGENTS_2015_4_24", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3589413166046143, "logits_per_token_corr": -1.3589413166046143, "logits_per_char_corr": -0.6794706583023071, "bits_per_byte_corr": 0.9802689491630021}, "model_output": [{"sum_logits": -1.3032200336456299, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3032200336456299, "logits_per_char": -0.6516100168228149, "bits_per_byte": 0.9400745398645245, "num_chars": 2}, {"sum_logits": -1.2081711292266846, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2081711292266846, "logits_per_char": -0.6040855646133423, "bits_per_byte": 0.8715112483409045, "num_chars": 2}, {"sum_logits": -1.3589413166046143, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3589413166046143, "logits_per_char": -0.6794706583023071, "bits_per_byte": 0.9802689491630021, "num_chars": 2}, {"sum_logits": -1.8457176685333252, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.8457176685333252, "logits_per_char": -0.9228588342666626, "bits_per_byte": 1.3314038636380405, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 363, "native_id": "Mercury_7122640", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.476189136505127, "logits_per_token_corr": -1.476189136505127, "logits_per_char_corr": -0.7380945682525635, "bits_per_byte_corr": 1.064845373325819}, "model_output": [{"sum_logits": -1.476189136505127, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.476189136505127, "logits_per_char": -0.7380945682525635, "bits_per_byte": 1.064845373325819, "num_chars": 2}, {"sum_logits": -1.2963871955871582, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.2963871955871582, "logits_per_char": -0.6481935977935791, "bits_per_byte": 0.9351456890734438, "num_chars": 2}, {"sum_logits": -1.2679028511047363, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2679028511047363, "logits_per_char": -0.6339514255523682, "bits_per_byte": 0.9145985778095476, "num_chars": 2}, {"sum_logits": -1.6870675086975098, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.6870675086975098, "logits_per_char": -0.8435337543487549, "bits_per_byte": 1.216961964222272, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 364, "native_id": "Mercury_402547", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9053143858909607, "logits_per_token_corr": -0.9053143858909607, "logits_per_char_corr": -0.45265719294548035, "bits_per_byte_corr": 0.6530462874856323}, "model_output": [{"sum_logits": -2.0745363235473633, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -2.0745363235473633, "logits_per_char": -1.0372681617736816, "bits_per_byte": 1.4964616330639766, "num_chars": 2}, {"sum_logits": -1.722153663635254, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.722153663635254, "logits_per_char": -0.861076831817627, "bits_per_byte": 1.242271275088563, "num_chars": 2}, {"sum_logits": -0.9053143858909607, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": true, "logits_per_token": -0.9053143858909607, "logits_per_char": -0.45265719294548035, "bits_per_byte": 0.6530462874856323, "num_chars": 2}, {"sum_logits": -1.8704252243041992, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.8704252243041992, "logits_per_char": -0.9352126121520996, "bits_per_byte": 1.349226597729617, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 365, "native_id": "Mercury_7133945", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1512787342071533, "logits_per_token_corr": -1.1512787342071533, "logits_per_char_corr": -0.5756393671035767, "bits_per_byte_corr": 0.8304720602613882}, "model_output": [{"sum_logits": -1.1772778034210205, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.1772778034210205, "logits_per_char": -0.5886389017105103, "bits_per_byte": 0.8492264243726892, "num_chars": 2}, {"sum_logits": -1.1512787342071533, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.1512787342071533, "logits_per_char": -0.5756393671035767, "bits_per_byte": 0.8304720602613882, "num_chars": 2}, {"sum_logits": -1.526911973953247, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.526911973953247, "logits_per_char": -0.7634559869766235, "bits_per_byte": 1.101434166348955, "num_chars": 2}, {"sum_logits": -1.9764511585235596, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.9764511585235596, "logits_per_char": -0.9882255792617798, "bits_per_byte": 1.4257081424816171, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 366, "native_id": "Mercury_7199028", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3466638326644897, "logits_per_token_corr": -1.3466638326644897, "logits_per_char_corr": -0.6733319163322449, "bits_per_byte_corr": 0.97141261656549}, "model_output": [{"sum_logits": -1.2369436025619507, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.2369436025619507, "logits_per_char": -0.6184718012809753, "bits_per_byte": 0.8922662006383686, "num_chars": 2}, {"sum_logits": -1.3466638326644897, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3466638326644897, "logits_per_char": -0.6733319163322449, "bits_per_byte": 0.97141261656549, "num_chars": 2}, {"sum_logits": -1.2973664999008179, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.2973664999008179, "logits_per_char": -0.6486832499504089, "bits_per_byte": 0.9358521078118632, "num_chars": 2}, {"sum_logits": -1.879834532737732, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.879834532737732, "logits_per_char": -0.939917266368866, "bits_per_byte": 1.356013979037248, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 367, "native_id": "Mercury_7217298", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3922607898712158, "logits_per_token_corr": -1.3922607898712158, "logits_per_char_corr": -0.6961303949356079, "bits_per_byte_corr": 1.0043038685863985}, "model_output": [{"sum_logits": -1.3922607898712158, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3922607898712158, "logits_per_char": -0.6961303949356079, "bits_per_byte": 1.0043038685863985, "num_chars": 2}, {"sum_logits": -1.2545864582061768, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.2545864582061768, "logits_per_char": -0.6272932291030884, "bits_per_byte": 0.9049928308109002, "num_chars": 2}, {"sum_logits": -1.3324120044708252, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3324120044708252, "logits_per_char": -0.6662060022354126, "bits_per_byte": 0.961132095636182, "num_chars": 2}, {"sum_logits": -1.6947987079620361, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.6947987079620361, "logits_per_char": -0.8473993539810181, "bits_per_byte": 1.2225388456418043, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 368, "native_id": "Mercury_7057680", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.388089656829834, "logits_per_token_corr": -1.388089656829834, "logits_per_char_corr": -0.694044828414917, "bits_per_byte_corr": 1.0012950321095517}, "model_output": [{"sum_logits": -1.322202205657959, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.322202205657959, "logits_per_char": -0.6611011028289795, "bits_per_byte": 0.9537672825782786, "num_chars": 2}, {"sum_logits": -1.0485472679138184, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.0485472679138184, "logits_per_char": -0.5242736339569092, "bits_per_byte": 0.7563669717790119, "num_chars": 2}, {"sum_logits": -1.388089656829834, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.388089656829834, "logits_per_char": -0.694044828414917, "bits_per_byte": 1.0012950321095517, "num_chars": 2}, {"sum_logits": -2.106544017791748, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -2.106544017791748, "logits_per_char": -1.053272008895874, "bits_per_byte": 1.5195503039423253, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 369, "native_id": "Mercury_SC_400404", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6792378425598145, "logits_per_token_corr": -1.6792378425598145, "logits_per_char_corr": -0.8396189212799072, "bits_per_byte_corr": 1.2113140539679335}, "model_output": [{"sum_logits": -1.6618084907531738, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.6618084907531738, "logits_per_char": -0.8309042453765869, "bits_per_byte": 1.1987414342592495, "num_chars": 2}, {"sum_logits": -1.0409588813781738, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.0409588813781738, "logits_per_char": -0.5204794406890869, "bits_per_byte": 0.7508931079673465, "num_chars": 2}, {"sum_logits": -1.3449902534484863, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3449902534484863, "logits_per_char": -0.6724951267242432, "bits_per_byte": 0.9702053843477576, "num_chars": 2}, {"sum_logits": -1.6792378425598145, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.6792378425598145, "logits_per_char": -0.8396189212799072, "bits_per_byte": 1.2113140539679335, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 370, "native_id": "Mercury_SC_408030", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3262470960617065, "logits_per_token_corr": -1.3262470960617065, "logits_per_char_corr": -0.6631235480308533, "bits_per_byte_corr": 0.9566850542414936}, "model_output": [{"sum_logits": -1.3262470960617065, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3262470960617065, "logits_per_char": -0.6631235480308533, "bits_per_byte": 0.9566850542414936, "num_chars": 2}, {"sum_logits": -1.24364173412323, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.24364173412323, "logits_per_char": -0.621820867061615, "bits_per_byte": 0.8970978812317119, "num_chars": 2}, {"sum_logits": -1.2852410078048706, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.2852410078048706, "logits_per_char": -0.6426205039024353, "bits_per_byte": 0.9271054141542763, "num_chars": 2}, {"sum_logits": -1.8837765455245972, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.8837765455245972, "logits_per_char": -0.9418882727622986, "bits_per_byte": 1.3588575401866156, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 371, "native_id": "Mercury_415083", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6939241886138916, "logits_per_token_corr": -1.6939241886138916, "logits_per_char_corr": -0.8469620943069458, "bits_per_byte_corr": 1.221908013278439}, "model_output": [{"sum_logits": -1.7006800174713135, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.7006800174713135, "logits_per_char": -0.8503400087356567, "bits_per_byte": 1.2267813136732912, "num_chars": 2}, {"sum_logits": -1.6939241886138916, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.6939241886138916, "logits_per_char": -0.8469620943069458, "bits_per_byte": 1.221908013278439, "num_chars": 2}, {"sum_logits": -1.1748106479644775, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.1748106479644775, "logits_per_char": -0.5874053239822388, "bits_per_byte": 0.8474467479015596, "num_chars": 2}, {"sum_logits": -1.24886155128479, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.24886155128479, "logits_per_char": -0.624430775642395, "bits_per_byte": 0.9008631833983795, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 372, "native_id": "Mercury_409114", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.373842716217041, "logits_per_token_corr": -1.373842716217041, "logits_per_char_corr": -0.6869213581085205, "bits_per_byte_corr": 0.9910180368245862}, "model_output": [{"sum_logits": -1.6918816566467285, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.6918816566467285, "logits_per_char": -0.8459408283233643, "bits_per_byte": 1.2204346379084963, "num_chars": 2}, {"sum_logits": -1.373842716217041, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.373842716217041, "logits_per_char": -0.6869213581085205, "bits_per_byte": 0.9910180368245862, "num_chars": 2}, {"sum_logits": -1.2498154640197754, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.2498154640197754, "logits_per_char": -0.6249077320098877, "bits_per_byte": 0.901551285984482, "num_chars": 2}, {"sum_logits": -1.4076313972473145, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4076313972473145, "logits_per_char": -0.7038156986236572, "bits_per_byte": 1.015391418104881, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 373, "native_id": "Mercury_SC_415006", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1257622241973877, "logits_per_token_corr": -1.1257622241973877, "logits_per_char_corr": -0.5628811120986938, "bits_per_byte_corr": 0.8120657890354337}, "model_output": [{"sum_logits": -1.1257622241973877, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.1257622241973877, "logits_per_char": -0.5628811120986938, "bits_per_byte": 0.8120657890354337, "num_chars": 2}, {"sum_logits": -1.0159103870391846, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.0159103870391846, "logits_per_char": -0.5079551935195923, "bits_per_byte": 0.732824438685036, "num_chars": 2}, {"sum_logits": -1.5359470844268799, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.5359470844268799, "logits_per_char": -0.7679735422134399, "bits_per_byte": 1.1079516208860567, "num_chars": 2}, {"sum_logits": -2.3989670276641846, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -2.3989670276641846, "logits_per_char": -1.1994835138320923, "bits_per_byte": 1.7304889170348712, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 374, "native_id": "MSA_2012_5_15", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3043160438537598, "logits_per_token_corr": -1.3043160438537598, "logits_per_char_corr": -0.6521580219268799, "bits_per_byte_corr": 0.9408651441105415}, "model_output": [{"sum_logits": -1.340038776397705, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.340038776397705, "logits_per_char": -0.6700193881988525, "bits_per_byte": 0.9666336486546363, "num_chars": 2}, {"sum_logits": -1.1739506721496582, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": true, "logits_per_token": -1.1739506721496582, "logits_per_char": -0.5869753360748291, "bits_per_byte": 0.846826406479897, "num_chars": 2}, {"sum_logits": -1.3043160438537598, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.3043160438537598, "logits_per_char": -0.6521580219268799, "bits_per_byte": 0.9408651441105415, "num_chars": 2}, {"sum_logits": -1.968376636505127, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.968376636505127, "logits_per_char": -0.9841883182525635, "bits_per_byte": 1.4198836060448423, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 375, "native_id": "Mercury_SC_402612", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6932555437088013, "logits_per_token_corr": -1.6932555437088013, "logits_per_char_corr": -0.8466277718544006, "bits_per_byte_corr": 1.2214256879340941}, "model_output": [{"sum_logits": -1.4278994798660278, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4278994798660278, "logits_per_char": -0.7139497399330139, "bits_per_byte": 1.0300117492460643, "num_chars": 2}, {"sum_logits": -1.277690052986145, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.277690052986145, "logits_per_char": -0.6388450264930725, "bits_per_byte": 0.9216585516187963, "num_chars": 2}, {"sum_logits": -1.24274742603302, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.24274742603302, "logits_per_char": -0.62137371301651, "bits_per_byte": 0.8964527743083249, "num_chars": 2}, {"sum_logits": -1.6932555437088013, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.6932555437088013, "logits_per_char": -0.8466277718544006, "bits_per_byte": 1.2214256879340941, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 376, "native_id": "Mercury_SC_405937", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.27030348777771, "logits_per_token_corr": -1.27030348777771, "logits_per_char_corr": -0.635151743888855, "bits_per_byte_corr": 0.9163302711210863}, "model_output": [{"sum_logits": -1.4867322444915771, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4867322444915771, "logits_per_char": -0.7433661222457886, "bits_per_byte": 1.0724506181296285, "num_chars": 2}, {"sum_logits": -1.27030348777771, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.27030348777771, "logits_per_char": -0.635151743888855, "bits_per_byte": 0.9163302711210863, "num_chars": 2}, {"sum_logits": -1.2153146266937256, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2153146266937256, "logits_per_char": -0.6076573133468628, "bits_per_byte": 0.8766641925260596, "num_chars": 2}, {"sum_logits": -1.7003381252288818, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.7003381252288818, "logits_per_char": -0.8501690626144409, "bits_per_byte": 1.2265346905519536, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 377, "native_id": "Mercury_SC_416459", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4198715686798096, "logits_per_token_corr": -1.4198715686798096, "logits_per_char_corr": -0.7099357843399048, "bits_per_byte_corr": 1.024220835417533}, "model_output": [{"sum_logits": -1.5279724597930908, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5279724597930908, "logits_per_char": -0.7639862298965454, "bits_per_byte": 1.1021991451799935, "num_chars": 2}, {"sum_logits": -1.2305233478546143, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2305233478546143, "logits_per_char": -0.6152616739273071, "bits_per_byte": 0.8876349658246061, "num_chars": 2}, {"sum_logits": -1.4198715686798096, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4198715686798096, "logits_per_char": -0.7099357843399048, "bits_per_byte": 1.024220835417533, "num_chars": 2}, {"sum_logits": -1.4730637073516846, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4730637073516846, "logits_per_char": -0.7365318536758423, "bits_per_byte": 1.0625908527556567, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 378, "native_id": "NAEP_2000_8_S21+4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2789583206176758, "logits_per_token_corr": -1.2789583206176758, "logits_per_char_corr": -0.6394791603088379, "bits_per_byte_corr": 0.9225734133300617}, "model_output": [{"sum_logits": -1.3325262069702148, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.3325262069702148, "logits_per_char": -0.6662631034851074, "bits_per_byte": 0.9612144753259453, "num_chars": 2}, {"sum_logits": -1.3425111770629883, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.3425111770629883, "logits_per_char": -0.6712555885314941, "bits_per_byte": 0.9684171087440849, "num_chars": 2}, {"sum_logits": -1.2789583206176758, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.2789583206176758, "logits_per_char": -0.6394791603088379, "bits_per_byte": 0.9225734133300617, "num_chars": 2}, {"sum_logits": -1.7421579360961914, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.7421579360961914, "logits_per_char": -0.8710789680480957, "bits_per_byte": 1.2567013074265663, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 379, "native_id": "Mercury_7072380", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2432916164398193, "logits_per_token_corr": -1.2432916164398193, "logits_per_char_corr": -0.6216458082199097, "bits_per_byte_corr": 0.8968453247089196}, "model_output": [{"sum_logits": -1.3507153987884521, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3507153987884521, "logits_per_char": -0.6753576993942261, "bits_per_byte": 0.9743352037429293, "num_chars": 2}, {"sum_logits": -1.2432916164398193, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2432916164398193, "logits_per_char": -0.6216458082199097, "bits_per_byte": 0.8968453247089196, "num_chars": 2}, {"sum_logits": -1.2777345180511475, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.2777345180511475, "logits_per_char": -0.6388672590255737, "bits_per_byte": 0.9216906263831822, "num_chars": 2}, {"sum_logits": -1.9745938777923584, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.9745938777923584, "logits_per_char": -0.9872969388961792, "bits_per_byte": 1.424368397631395, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 380, "native_id": "Mercury_SC_401373", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3818252086639404, "logits_per_token_corr": -1.3818252086639404, "logits_per_char_corr": -0.6909126043319702, "bits_per_byte_corr": 0.9967761879581281}, "model_output": [{"sum_logits": -1.3818252086639404, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3818252086639404, "logits_per_char": -0.6909126043319702, "bits_per_byte": 0.9967761879581281, "num_chars": 2}, {"sum_logits": -1.1646068096160889, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.1646068096160889, "logits_per_char": -0.5823034048080444, "bits_per_byte": 0.8400862344099278, "num_chars": 2}, {"sum_logits": -1.4372718334197998, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4372718334197998, "logits_per_char": -0.7186359167098999, "bits_per_byte": 1.0367724732428116, "num_chars": 2}, {"sum_logits": -1.6737563610076904, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.6737563610076904, "logits_per_char": -0.8368781805038452, "bits_per_byte": 1.2073600008419436, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 381, "native_id": "Mercury_SC_400579", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2344679832458496, "logits_per_token_corr": -1.2344679832458496, "logits_per_char_corr": -0.6172339916229248, "bits_per_byte_corr": 0.8904804187831333}, "model_output": [{"sum_logits": -1.497983455657959, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.497983455657959, "logits_per_char": -0.7489917278289795, "bits_per_byte": 1.0805666514065013, "num_chars": 2}, {"sum_logits": -1.2708945274353027, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.2708945274353027, "logits_per_char": -0.6354472637176514, "bits_per_byte": 0.9167566161125755, "num_chars": 2}, {"sum_logits": -1.2344679832458496, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.2344679832458496, "logits_per_char": -0.6172339916229248, "bits_per_byte": 0.8904804187831333, "num_chars": 2}, {"sum_logits": -1.6466403007507324, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.6466403007507324, "logits_per_char": -0.8233201503753662, "bits_per_byte": 1.1877998980113498, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 382, "native_id": "MCAS_2003_5_14", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5178714990615845, "logits_per_token_corr": -1.5178714990615845, "logits_per_char_corr": -0.7589357495307922, "bits_per_byte_corr": 1.094912842202209}, "model_output": [{"sum_logits": -1.1813427209854126, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.1813427209854126, "logits_per_char": -0.5906713604927063, "bits_per_byte": 0.8521586425785768, "num_chars": 2}, {"sum_logits": -0.9862247705459595, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -0.9862247705459595, "logits_per_char": -0.49311238527297974, "bits_per_byte": 0.7114107928347669, "num_chars": 2}, {"sum_logits": -1.5178714990615845, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.5178714990615845, "logits_per_char": -0.7589357495307922, "bits_per_byte": 1.094912842202209, "num_chars": 2}, {"sum_logits": -2.3882689476013184, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -2.3882689476013184, "logits_per_char": -1.1941344738006592, "bits_per_byte": 1.7227718835080006, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 383, "native_id": "MSA_2015_8_30", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3430750370025635, "logits_per_token_corr": -1.3430750370025635, "logits_per_char_corr": -0.6715375185012817, "bits_per_byte_corr": 0.9688238477133757}, "model_output": [{"sum_logits": -1.3430750370025635, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.3430750370025635, "logits_per_char": -0.6715375185012817, "bits_per_byte": 0.9688238477133757, "num_chars": 2}, {"sum_logits": -1.0048658847808838, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.0048658847808838, "logits_per_char": -0.5024329423904419, "bits_per_byte": 0.7248575143664615, "num_chars": 2}, {"sum_logits": -1.4631998538970947, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4631998538970947, "logits_per_char": -0.7315999269485474, "bits_per_byte": 1.0554755865241554, "num_chars": 2}, {"sum_logits": -2.0353548526763916, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -2.0353548526763916, "logits_per_char": -1.0176774263381958, "bits_per_byte": 1.4681981762038132, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 384, "native_id": "Mercury_SC_415416", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3145010471343994, "logits_per_token_corr": -1.3145010471343994, "logits_per_char_corr": -0.6572505235671997, "bits_per_byte_corr": 0.9482120709727551}, "model_output": [{"sum_logits": -1.3098585605621338, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3098585605621338, "logits_per_char": -0.6549292802810669, "bits_per_byte": 0.9448632247951521, "num_chars": 2}, {"sum_logits": -1.1038095951080322, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.1038095951080322, "logits_per_char": -0.5519047975540161, "bits_per_byte": 0.7962303144745784, "num_chars": 2}, {"sum_logits": -1.3145010471343994, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3145010471343994, "logits_per_char": -0.6572505235671997, "bits_per_byte": 0.9482120709727551, "num_chars": 2}, {"sum_logits": -2.1578009128570557, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -2.1578009128570557, "logits_per_char": -1.0789004564285278, "bits_per_byte": 1.5565243381033944, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 385, "native_id": "NYSEDREGENTS_2012_8_42", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1677802801132202, "logits_per_token_corr": -1.1677802801132202, "logits_per_char_corr": -0.5838901400566101, "bits_per_byte_corr": 0.8423754094842388}, "model_output": [{"sum_logits": -1.1751407384872437, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.1751407384872437, "logits_per_char": -0.5875703692436218, "bits_per_byte": 0.8476848578816794, "num_chars": 2}, {"sum_logits": -1.1677802801132202, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.1677802801132202, "logits_per_char": -0.5838901400566101, "bits_per_byte": 0.8423754094842388, "num_chars": 2}, {"sum_logits": -1.4935859441757202, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4935859441757202, "logits_per_char": -0.7467929720878601, "bits_per_byte": 1.0773945174026598, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 386, "native_id": "NCEOGA_2013_5_9", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2850630283355713, "logits_per_token_corr": -1.2850630283355713, "logits_per_char_corr": -0.6425315141677856, "bits_per_byte_corr": 0.926977029105407}, "model_output": [{"sum_logits": -1.4725172519683838, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4725172519683838, "logits_per_char": -0.7362586259841919, "bits_per_byte": 1.062196668519879, "num_chars": 2}, {"sum_logits": -1.2850630283355713, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.2850630283355713, "logits_per_char": -0.6425315141677856, "bits_per_byte": 0.926977029105407, "num_chars": 2}, {"sum_logits": -1.1240026950836182, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.1240026950836182, "logits_per_char": -0.5620013475418091, "bits_per_byte": 0.8107965570720653, "num_chars": 2}, {"sum_logits": -1.842937707901001, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.842937707901001, "logits_per_char": -0.9214688539505005, "bits_per_byte": 1.3293985459289785, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 387, "native_id": "MEAP_2005_8_45", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9444751739501953, "logits_per_token_corr": -1.9444751739501953, "logits_per_char_corr": -0.9722375869750977, "bits_per_byte_corr": 1.4026423452958334}, "model_output": [{"sum_logits": -1.335256576538086, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.335256576538086, "logits_per_char": -0.667628288269043, "bits_per_byte": 0.9631840206436276, "num_chars": 2}, {"sum_logits": -1.211812973022461, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": true, "logits_per_token": -1.211812973022461, "logits_per_char": -0.6059064865112305, "bits_per_byte": 0.8741382833328358, "num_chars": 2}, {"sum_logits": -1.2838630676269531, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.2838630676269531, "logits_per_char": -0.6419315338134766, "bits_per_byte": 0.9261114404236139, "num_chars": 2}, {"sum_logits": -1.9444751739501953, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.9444751739501953, "logits_per_char": -0.9722375869750977, "bits_per_byte": 1.4026423452958334, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 388, "native_id": "Mercury_SC_400594", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9271360635757446, "logits_per_token_corr": -1.9271360635757446, "logits_per_char_corr": -0.9635680317878723, "bits_per_byte_corr": 1.3901348210205011}, "model_output": [{"sum_logits": -1.3228803873062134, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3228803873062134, "logits_per_char": -0.6614401936531067, "bits_per_byte": 0.9542564872286582, "num_chars": 2}, {"sum_logits": -1.2065664529800415, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.2065664529800415, "logits_per_char": -0.6032832264900208, "bits_per_byte": 0.8703537191092715, "num_chars": 2}, {"sum_logits": -1.2957369089126587, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.2957369089126587, "logits_per_char": -0.6478684544563293, "bits_per_byte": 0.9346766063932151, "num_chars": 2}, {"sum_logits": -1.9271360635757446, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.9271360635757446, "logits_per_char": -0.9635680317878723, "bits_per_byte": 1.3901348210205011, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 389, "native_id": "NCEOGA_2013_8_43", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.114922285079956, "logits_per_token_corr": -1.114922285079956, "logits_per_char_corr": -0.557461142539978, "bits_per_byte_corr": 0.8042464258312997}, "model_output": [{"sum_logits": -1.114922285079956, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.114922285079956, "logits_per_char": -0.557461142539978, "bits_per_byte": 0.8042464258312997, "num_chars": 2}, {"sum_logits": -1.2225215435028076, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.2225215435028076, "logits_per_char": -0.6112607717514038, "bits_per_byte": 0.8818628840963444, "num_chars": 2}, {"sum_logits": -1.468886137008667, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.468886137008667, "logits_per_char": -0.7344430685043335, "bits_per_byte": 1.0595773727472364, "num_chars": 2}, {"sum_logits": -2.0206973552703857, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -2.0206973552703857, "logits_per_char": -1.0103486776351929, "bits_per_byte": 1.4576250267940618, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 390, "native_id": "MCAS_2006_8_13", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3460156917572021, "logits_per_token_corr": -1.3460156917572021, "logits_per_char_corr": -0.6730078458786011, "bits_per_byte_corr": 0.9709450817291191}, "model_output": [{"sum_logits": -1.286590337753296, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.286590337753296, "logits_per_char": -0.643295168876648, "bits_per_byte": 0.928078749966835, "num_chars": 2}, {"sum_logits": -1.0630643367767334, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.0630643367767334, "logits_per_char": -0.5315321683883667, "bits_per_byte": 0.7668388234074051, "num_chars": 2}, {"sum_logits": -1.3460156917572021, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.3460156917572021, "logits_per_char": -0.6730078458786011, "bits_per_byte": 0.9709450817291191, "num_chars": 2}, {"sum_logits": -2.2163217067718506, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -2.2163217067718506, "logits_per_char": -1.1081608533859253, "bits_per_byte": 1.5987381676883046, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 391, "native_id": "Mercury_7168823", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7132642269134521, "logits_per_token_corr": -1.7132642269134521, "logits_per_char_corr": -0.8566321134567261, "bits_per_byte_corr": 1.2358589019511383}, "model_output": [{"sum_logits": -1.5160424709320068, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.5160424709320068, "logits_per_char": -0.7580212354660034, "bits_per_byte": 1.093593477296114, "num_chars": 2}, {"sum_logits": -1.0890672206878662, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": true, "logits_per_token": -1.0890672206878662, "logits_per_char": -0.5445336103439331, "bits_per_byte": 0.7855959392411198, "num_chars": 2}, {"sum_logits": -1.400348424911499, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.400348424911499, "logits_per_char": -0.7001742124557495, "bits_per_byte": 1.010137864068971, "num_chars": 2}, {"sum_logits": -1.7132642269134521, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.7132642269134521, "logits_per_char": -0.8566321134567261, "bits_per_byte": 1.2358589019511383, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 392, "native_id": "Mercury_7158935", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2816663980484009, "logits_per_token_corr": -1.2816663980484009, "logits_per_char_corr": -0.6408331990242004, "bits_per_byte_corr": 0.9245268782698883}, "model_output": [{"sum_logits": -1.5031858682632446, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.5031858682632446, "logits_per_char": -0.7515929341316223, "bits_per_byte": 1.0843193988396558, "num_chars": 2}, {"sum_logits": -1.1260496377944946, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": true, "logits_per_token": -1.1260496377944946, "logits_per_char": -0.5630248188972473, "bits_per_byte": 0.812273114121049, "num_chars": 2}, {"sum_logits": -1.2816663980484009, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.2816663980484009, "logits_per_char": -0.6408331990242004, "bits_per_byte": 0.9245268782698883, "num_chars": 2}, {"sum_logits": -1.8178335428237915, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.8178335428237915, "logits_per_char": -0.9089167714118958, "bits_per_byte": 1.3112897186976917, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 393, "native_id": "Mercury_7172708", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9003215432167053, "logits_per_token_corr": -0.9003215432167053, "logits_per_char_corr": -0.45016077160835266, "bits_per_byte_corr": 0.6494447128025862}, "model_output": [{"sum_logits": -1.6367621421813965, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.6367621421813965, "logits_per_char": -0.8183810710906982, "bits_per_byte": 1.1806743128207968, "num_chars": 2}, {"sum_logits": -0.9003215432167053, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -0.9003215432167053, "logits_per_char": -0.45016077160835266, "bits_per_byte": 0.6494447128025862, "num_chars": 2}, {"sum_logits": -1.6893248558044434, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.6893248558044434, "logits_per_char": -0.8446624279022217, "bits_per_byte": 1.2185902959606423, "num_chars": 2}, {"sum_logits": -1.5884108543395996, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.5884108543395996, "logits_per_char": -0.7942054271697998, "bits_per_byte": 1.145796231225794, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 394, "native_id": "ACTAAP_2010_5_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4801876544952393, "logits_per_token_corr": -1.4801876544952393, "logits_per_char_corr": -0.7400938272476196, "bits_per_byte_corr": 1.0677296943634411}, "model_output": [{"sum_logits": -1.4801876544952393, "num_tokens": 1, "num_tokens_all": 415, "is_greedy": false, "logits_per_token": -1.4801876544952393, "logits_per_char": -0.7400938272476196, "bits_per_byte": 1.0677296943634411, "num_chars": 2}, {"sum_logits": -0.9584687352180481, "num_tokens": 1, "num_tokens_all": 415, "is_greedy": true, "logits_per_token": -0.9584687352180481, "logits_per_char": -0.47923436760902405, "bits_per_byte": 0.6913890455735942, "num_chars": 2}, {"sum_logits": -1.3982746601104736, "num_tokens": 1, "num_tokens_all": 415, "is_greedy": false, "logits_per_token": -1.3982746601104736, "logits_per_char": -0.6991373300552368, "bits_per_byte": 1.0086419589717652, "num_chars": 2}, {"sum_logits": -2.0376293659210205, "num_tokens": 1, "num_tokens_all": 415, "is_greedy": false, "logits_per_token": -2.0376293659210205, "logits_per_char": -1.0188146829605103, "bits_per_byte": 1.4698388906930455, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 395, "native_id": "Mercury_7093048", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3425898551940918, "logits_per_token_corr": -1.3425898551940918, "logits_per_char_corr": -0.6712949275970459, "bits_per_byte_corr": 0.9684738630188696}, "model_output": [{"sum_logits": -1.4688162803649902, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4688162803649902, "logits_per_char": -0.7344081401824951, "bits_per_byte": 1.0595269818305335, "num_chars": 2}, {"sum_logits": -1.2573275566101074, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.2573275566101074, "logits_per_char": -0.6286637783050537, "bits_per_byte": 0.9069701153478713, "num_chars": 2}, {"sum_logits": -1.3425898551940918, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3425898551940918, "logits_per_char": -0.6712949275970459, "bits_per_byte": 0.9684738630188696, "num_chars": 2}, {"sum_logits": -1.6651568412780762, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.6651568412780762, "logits_per_char": -0.8325784206390381, "bits_per_byte": 1.2011567586079686, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 396, "native_id": "Mercury_7081603", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3856984376907349, "logits_per_token_corr": -1.3856984376907349, "logits_per_char_corr": -0.6928492188453674, "bits_per_byte_corr": 0.9995701321127218}, "model_output": [{"sum_logits": -1.5475255250930786, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.5475255250930786, "logits_per_char": -0.7737627625465393, "bits_per_byte": 1.1163037003512388, "num_chars": 2}, {"sum_logits": -0.9603101015090942, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": true, "logits_per_token": -0.9603101015090942, "logits_per_char": -0.4801550507545471, "bits_per_byte": 0.6927173105818714, "num_chars": 2}, {"sum_logits": -1.3856984376907349, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.3856984376907349, "logits_per_char": -0.6928492188453674, "bits_per_byte": 0.9995701321127218, "num_chars": 2}, {"sum_logits": -1.9359036684036255, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.9359036684036255, "logits_per_char": -0.9679518342018127, "bits_per_byte": 1.3964593110233345, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 397, "native_id": "Mercury_SC_LBS11003", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.856177568435669, "logits_per_token_corr": -1.856177568435669, "logits_per_char_corr": -0.9280887842178345, "bits_per_byte_corr": 1.338949086496699}, "model_output": [{"sum_logits": -1.3087284564971924, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3087284564971924, "logits_per_char": -0.6543642282485962, "bits_per_byte": 0.9440480270300617, "num_chars": 2}, {"sum_logits": -1.1136677265167236, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.1136677265167236, "logits_per_char": -0.5568338632583618, "bits_per_byte": 0.8033414531224589, "num_chars": 2}, {"sum_logits": -1.4735405445098877, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4735405445098877, "logits_per_char": -0.7367702722549438, "bits_per_byte": 1.0629348180573825, "num_chars": 2}, {"sum_logits": -1.856177568435669, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.856177568435669, "logits_per_char": -0.9280887842178345, "bits_per_byte": 1.338949086496699, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 398, "native_id": "MCAS_2005_8_2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.329390048980713, "logits_per_token_corr": -1.329390048980713, "logits_per_char_corr": -0.6646950244903564, "bits_per_byte_corr": 0.9589522155364942}, "model_output": [{"sum_logits": -1.391554355621338, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.391554355621338, "logits_per_char": -0.695777177810669, "bits_per_byte": 1.0037942839918919, "num_chars": 2}, {"sum_logits": -1.0081849098205566, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": true, "logits_per_token": -1.0081849098205566, "logits_per_char": -0.5040924549102783, "bits_per_byte": 0.7272516848491244, "num_chars": 2}, {"sum_logits": -1.329390048980713, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.329390048980713, "logits_per_char": -0.6646950244903564, "bits_per_byte": 0.9589522155364942, "num_chars": 2}, {"sum_logits": -2.175570011138916, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -2.175570011138916, "logits_per_char": -1.087785005569458, "bits_per_byte": 1.569342033089558, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 399, "native_id": "ACTAAP_2010_7_14", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3821799755096436, "logits_per_token_corr": -1.3821799755096436, "logits_per_char_corr": -0.6910899877548218, "bits_per_byte_corr": 0.9970320981426122}, "model_output": [{"sum_logits": -1.4020469188690186, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4020469188690186, "logits_per_char": -0.7010234594345093, "bits_per_byte": 1.0113630684737185, "num_chars": 2}, {"sum_logits": -0.9627998471260071, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -0.9627998471260071, "logits_per_char": -0.48139992356300354, "bits_per_byte": 0.6945132824091702, "num_chars": 2}, {"sum_logits": -1.3821799755096436, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.3821799755096436, "logits_per_char": -0.6910899877548218, "bits_per_byte": 0.9970320981426122, "num_chars": 2}, {"sum_logits": -2.1831214427948, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -2.1831214427948, "logits_per_char": -1.0915607213974, "bits_per_byte": 1.5747892395903396, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 400, "native_id": "NYSEDREGENTS_2008_4_15", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4994016885757446, "logits_per_token_corr": -1.4994016885757446, "logits_per_char_corr": -0.7497008442878723, "bits_per_byte_corr": 1.0815896902051594}, "model_output": [{"sum_logits": -1.1509138345718384, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.1509138345718384, "logits_per_char": -0.5754569172859192, "bits_per_byte": 0.8302088408142425, "num_chars": 2}, {"sum_logits": -1.337633728981018, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.337633728981018, "logits_per_char": -0.668816864490509, "bits_per_byte": 0.9648987736640564, "num_chars": 2}, {"sum_logits": -1.4994016885757446, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4994016885757446, "logits_per_char": -0.7497008442878723, "bits_per_byte": 1.0815896902051594, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 401, "native_id": "Mercury_7107240", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6048145294189453, "logits_per_token_corr": -1.6048145294189453, "logits_per_char_corr": -0.8024072647094727, "bits_per_byte_corr": 1.1576289815704657}, "model_output": [{"sum_logits": -1.6048145294189453, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.6048145294189453, "logits_per_char": -0.8024072647094727, "bits_per_byte": 1.1576289815704657, "num_chars": 2}, {"sum_logits": -1.2377090454101562, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.2377090454101562, "logits_per_char": -0.6188545227050781, "bits_per_byte": 0.892818350938964, "num_chars": 2}, {"sum_logits": -1.2610435485839844, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.2610435485839844, "logits_per_char": -0.6305217742919922, "bits_per_byte": 0.909650636944221, "num_chars": 2}, {"sum_logits": -1.5467948913574219, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.5467948913574219, "logits_per_char": -0.7733974456787109, "bits_per_byte": 1.1157766595176692, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 402, "native_id": "Mercury_7218628", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4812226295471191, "logits_per_token_corr": -1.4812226295471191, "logits_per_char_corr": -0.7406113147735596, "bits_per_byte_corr": 1.068476271050837}, "model_output": [{"sum_logits": -1.2166104316711426, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.2166104316711426, "logits_per_char": -0.6083052158355713, "bits_per_byte": 0.8775989182334998, "num_chars": 2}, {"sum_logits": -1.0490670204162598, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": true, "logits_per_token": -1.0490670204162598, "logits_per_char": -0.5245335102081299, "bits_per_byte": 0.7567418939578932, "num_chars": 2}, {"sum_logits": -1.4812226295471191, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.4812226295471191, "logits_per_char": -0.7406113147735596, "bits_per_byte": 1.068476271050837, "num_chars": 2}, {"sum_logits": -2.1758618354797363, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -2.1758618354797363, "logits_per_char": -1.0879309177398682, "bits_per_byte": 1.5695525398542143, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 403, "native_id": "MSA_2013_5_23", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.637519359588623, "logits_per_token_corr": -1.637519359588623, "logits_per_char_corr": -0.8187596797943115, "bits_per_byte_corr": 1.1812205297199374}, "model_output": [{"sum_logits": -1.5030913352966309, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.5030913352966309, "logits_per_char": -0.7515456676483154, "bits_per_byte": 1.0842512077185886, "num_chars": 2}, {"sum_logits": -1.259315013885498, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.259315013885498, "logits_per_char": -0.629657506942749, "bits_per_byte": 0.9084037627254647, "num_chars": 2}, {"sum_logits": -1.2462897300720215, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.2462897300720215, "logits_per_char": -0.6231448650360107, "bits_per_byte": 0.899008006543521, "num_chars": 2}, {"sum_logits": -1.637519359588623, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.637519359588623, "logits_per_char": -0.8187596797943115, "bits_per_byte": 1.1812205297199374, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 404, "native_id": "Mercury_7081725", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5082927942276, "logits_per_token_corr": -1.5082927942276, "logits_per_char_corr": -0.7541463971138, "bits_per_byte_corr": 1.08800326722114}, "model_output": [{"sum_logits": -1.5082927942276, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.5082927942276, "logits_per_char": -0.7541463971138, "bits_per_byte": 1.08800326722114, "num_chars": 2}, {"sum_logits": -1.2223316431045532, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.2223316431045532, "logits_per_char": -0.6111658215522766, "bits_per_byte": 0.8817258999149321, "num_chars": 2}, {"sum_logits": -1.3855472803115845, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.3855472803115845, "logits_per_char": -0.6927736401557922, "bits_per_byte": 0.9994610951120747, "num_chars": 2}, {"sum_logits": -1.5240436792373657, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.5240436792373657, "logits_per_char": -0.7620218396186829, "bits_per_byte": 1.0993651290677486, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 405, "native_id": "Mercury_SC_413542", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.505497694015503, "logits_per_token_corr": -1.505497694015503, "logits_per_char_corr": -0.7527488470077515, "bits_per_byte_corr": 1.0859870286137483}, "model_output": [{"sum_logits": -1.505497694015503, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.505497694015503, "logits_per_char": -0.7527488470077515, "bits_per_byte": 1.0859870286137483, "num_chars": 2}, {"sum_logits": -1.1730296611785889, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.1730296611785889, "logits_per_char": -0.5865148305892944, "bits_per_byte": 0.8461620374996135, "num_chars": 2}, {"sum_logits": -1.2637851238250732, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.2637851238250732, "logits_per_char": -0.6318925619125366, "bits_per_byte": 0.9116282654464938, "num_chars": 2}, {"sum_logits": -1.7511012554168701, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.7511012554168701, "logits_per_char": -0.8755506277084351, "bits_per_byte": 1.2631525486430857, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 406, "native_id": "Mercury_SC_407302", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5052578449249268, "logits_per_token_corr": -1.5052578449249268, "logits_per_char_corr": -0.7526289224624634, "bits_per_byte_corr": 1.08581401406698}, "model_output": [{"sum_logits": -1.5372917652130127, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.5372917652130127, "logits_per_char": -0.7686458826065063, "bits_per_byte": 1.1089216030369238, "num_chars": 2}, {"sum_logits": -1.0433285236358643, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.0433285236358643, "logits_per_char": -0.5216642618179321, "bits_per_byte": 0.7526024435342732, "num_chars": 2}, {"sum_logits": -1.5052578449249268, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.5052578449249268, "logits_per_char": -0.7526289224624634, "bits_per_byte": 1.08581401406698, "num_chars": 2}, {"sum_logits": -1.6263859272003174, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.6263859272003174, "logits_per_char": -0.8131929636001587, "bits_per_byte": 1.173189455872591, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 407, "native_id": "Mercury_175053", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.0891032218933105, "logits_per_token_corr": -2.0891032218933105, "logits_per_char_corr": -1.0445516109466553, "bits_per_byte_corr": 1.5069694290664002}, "model_output": [{"sum_logits": -1.1708999872207642, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.1708999872207642, "logits_per_char": -0.5854499936103821, "bits_per_byte": 0.8446258024707803, "num_chars": 2}, {"sum_logits": -1.1592994928359985, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.1592994928359985, "logits_per_char": -0.5796497464179993, "bits_per_byte": 0.8362578146103935, "num_chars": 2}, {"sum_logits": -1.4529377222061157, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4529377222061157, "logits_per_char": -0.7264688611030579, "bits_per_byte": 1.0480730232743878, "num_chars": 2}, {"sum_logits": -2.0891032218933105, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -2.0891032218933105, "logits_per_char": -1.0445516109466553, "bits_per_byte": 1.5069694290664002, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 408, "native_id": "Mercury_7161315", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2049953937530518, "logits_per_token_corr": -1.2049953937530518, "logits_per_char_corr": -0.6024976968765259, "bits_per_byte_corr": 0.8692204394314103}, "model_output": [{"sum_logits": -1.4007771015167236, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.4007771015167236, "logits_per_char": -0.7003885507583618, "bits_per_byte": 1.0104470888752226, "num_chars": 2}, {"sum_logits": -1.2049953937530518, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": true, "logits_per_token": -1.2049953937530518, "logits_per_char": -0.6024976968765259, "bits_per_byte": 0.8692204394314103, "num_chars": 2}, {"sum_logits": -1.3685905933380127, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.3685905933380127, "logits_per_char": -0.6842952966690063, "bits_per_byte": 0.9872294310087267, "num_chars": 2}, {"sum_logits": -1.7311699390411377, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.7311699390411377, "logits_per_char": -0.8655849695205688, "bits_per_byte": 1.2487751429962464, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 409, "native_id": "Mercury_189070", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5918247699737549, "logits_per_token_corr": -1.5918247699737549, "logits_per_char_corr": -0.7959123849868774, "bits_per_byte_corr": 1.1482588508035005}, "model_output": [{"sum_logits": -1.5918247699737549, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.5918247699737549, "logits_per_char": -0.7959123849868774, "bits_per_byte": 1.1482588508035005, "num_chars": 2}, {"sum_logits": -1.1225578784942627, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.1225578784942627, "logits_per_char": -0.5612789392471313, "bits_per_byte": 0.8097543422078359, "num_chars": 2}, {"sum_logits": -1.2709476947784424, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.2709476947784424, "logits_per_char": -0.6354738473892212, "bits_per_byte": 0.916794968243718, "num_chars": 2}, {"sum_logits": -1.7521107196807861, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.7521107196807861, "logits_per_char": -0.8760553598403931, "bits_per_byte": 1.2638807231868394, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 410, "native_id": "Mercury_7189123", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.097670078277588, "logits_per_token_corr": -1.097670078277588, "logits_per_char_corr": -0.548835039138794, "bits_per_byte_corr": 0.791801589232207}, "model_output": [{"sum_logits": -1.4063363075256348, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.4063363075256348, "logits_per_char": -0.7031681537628174, "bits_per_byte": 1.0144572083453935, "num_chars": 2}, {"sum_logits": -1.097670078277588, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": true, "logits_per_token": -1.097670078277588, "logits_per_char": -0.548835039138794, "bits_per_byte": 0.791801589232207, "num_chars": 2}, {"sum_logits": -1.2713665962219238, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.2713665962219238, "logits_per_char": -0.6356832981109619, "bits_per_byte": 0.9170971417612842, "num_chars": 2}, {"sum_logits": -2.01591157913208, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -2.01591157913208, "logits_per_char": -1.00795578956604, "bits_per_byte": 1.4541728190432903, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 411, "native_id": "Mercury_SC_402171", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7123022079467773, "logits_per_token_corr": -1.7123022079467773, "logits_per_char_corr": -0.8561511039733887, "bits_per_byte_corr": 1.2351649519549066}, "model_output": [{"sum_logits": -1.4956140518188477, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4956140518188477, "logits_per_char": -0.7478070259094238, "bits_per_byte": 1.0788574878222255, "num_chars": 2}, {"sum_logits": -1.2482023239135742, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.2482023239135742, "logits_per_char": -0.6241011619567871, "bits_per_byte": 0.9003876513687434, "num_chars": 2}, {"sum_logits": -1.233708381652832, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.233708381652832, "logits_per_char": -0.616854190826416, "bits_per_byte": 0.8899324820574841, "num_chars": 2}, {"sum_logits": -1.7123022079467773, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.7123022079467773, "logits_per_char": -0.8561511039733887, "bits_per_byte": 1.2351649519549066, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 412, "native_id": "Mercury_7217368", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5835261344909668, "logits_per_token_corr": -1.5835261344909668, "logits_per_char_corr": -0.7917630672454834, "bits_per_byte_corr": 1.1422726506749143}, "model_output": [{"sum_logits": -1.528348445892334, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.528348445892334, "logits_per_char": -0.764174222946167, "bits_per_byte": 1.1024703618204044, "num_chars": 2}, {"sum_logits": -1.5835261344909668, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5835261344909668, "logits_per_char": -0.7917630672454834, "bits_per_byte": 1.1422726506749143, "num_chars": 2}, {"sum_logits": -1.0684103965759277, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.0684103965759277, "logits_per_char": -0.5342051982879639, "bits_per_byte": 0.7706951903877045, "num_chars": 2}, {"sum_logits": -1.5222201347351074, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5222201347351074, "logits_per_char": -0.7611100673675537, "bits_per_byte": 1.0980497197626236, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 413, "native_id": "Mercury_LBS10933", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8398518562316895, "logits_per_token_corr": -1.8398518562316895, "logits_per_char_corr": -0.9199259281158447, "bits_per_byte_corr": 1.3271725744788596}, "model_output": [{"sum_logits": -1.8398518562316895, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.8398518562316895, "logits_per_char": -0.9199259281158447, "bits_per_byte": 1.3271725744788596, "num_chars": 2}, {"sum_logits": -1.5370469093322754, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.5370469093322754, "logits_per_char": -0.7685234546661377, "bits_per_byte": 1.1087449768544875, "num_chars": 2}, {"sum_logits": -1.2806496620178223, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.2806496620178223, "logits_per_char": -0.6403248310089111, "bits_per_byte": 0.9237934582552834, "num_chars": 2}, {"sum_logits": -1.4316163063049316, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4316163063049316, "logits_per_char": -0.7158081531524658, "bits_per_byte": 1.032692872781692, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 414, "native_id": "Mercury_7223160", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.361649513244629, "logits_per_token_corr": -1.361649513244629, "logits_per_char_corr": -0.6808247566223145, "bits_per_byte_corr": 0.9822225000941541}, "model_output": [{"sum_logits": -1.300929069519043, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.300929069519043, "logits_per_char": -0.6504645347595215, "bits_per_byte": 0.9384219585723826, "num_chars": 2}, {"sum_logits": -1.2250394821166992, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.2250394821166992, "logits_per_char": -0.6125197410583496, "bits_per_byte": 0.8836791928721078, "num_chars": 2}, {"sum_logits": -1.361649513244629, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.361649513244629, "logits_per_char": -0.6808247566223145, "bits_per_byte": 0.9822225000941541, "num_chars": 2}, {"sum_logits": -1.8663549423217773, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.8663549423217773, "logits_per_char": -0.9331774711608887, "bits_per_byte": 1.346290509914085, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 415, "native_id": "Mercury_SC_401324", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7510149478912354, "logits_per_token_corr": -1.7510149478912354, "logits_per_char_corr": -0.8755074739456177, "bits_per_byte_corr": 1.2630902909234734}, "model_output": [{"sum_logits": -1.3914892673492432, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.3914892673492432, "logits_per_char": -0.6957446336746216, "bits_per_byte": 1.0037473327282063, "num_chars": 2}, {"sum_logits": -1.2261087894439697, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.2261087894439697, "logits_per_char": -0.6130543947219849, "bits_per_byte": 0.8844505350612281, "num_chars": 2}, {"sum_logits": -1.2991869449615479, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.2991869449615479, "logits_per_char": -0.6495934724807739, "bits_per_byte": 0.9371652813425272, "num_chars": 2}, {"sum_logits": -1.7510149478912354, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.7510149478912354, "logits_per_char": -0.8755074739456177, "bits_per_byte": 1.2630902909234734, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 416, "native_id": "LEAP_2001_8_10379", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.363741159439087, "logits_per_token_corr": -1.363741159439087, "logits_per_char_corr": -0.6818705797195435, "bits_per_byte_corr": 0.9837313038901747}, "model_output": [{"sum_logits": -1.363741159439087, "num_tokens": 1, "num_tokens_all": 466, "is_greedy": false, "logits_per_token": -1.363741159439087, "logits_per_char": -0.6818705797195435, "bits_per_byte": 0.9837313038901747, "num_chars": 2}, {"sum_logits": -1.0121843814849854, "num_tokens": 1, "num_tokens_all": 466, "is_greedy": true, "logits_per_token": -1.0121843814849854, "logits_per_char": -0.5060921907424927, "bits_per_byte": 0.7301366938173501, "num_chars": 2}, {"sum_logits": -1.4901392459869385, "num_tokens": 1, "num_tokens_all": 466, "is_greedy": false, "logits_per_token": -1.4901392459869385, "logits_per_char": -0.7450696229934692, "bits_per_byte": 1.0749082502104599, "num_chars": 2}, {"sum_logits": -1.9363882541656494, "num_tokens": 1, "num_tokens_all": 466, "is_greedy": false, "logits_per_token": -1.9363882541656494, "logits_per_char": -0.9681941270828247, "bits_per_byte": 1.3968088657612134, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 417, "native_id": "VASoL_2009_5_30", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9977469444274902, "logits_per_token_corr": -1.9977469444274902, "logits_per_char_corr": -0.9988734722137451, "bits_per_byte_corr": 1.441069804839345}, "model_output": [{"sum_logits": -1.0284419059753418, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.0284419059753418, "logits_per_char": -0.5142209529876709, "bits_per_byte": 0.7418640187970426, "num_chars": 2}, {"sum_logits": -1.2671160697937012, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.2671160697937012, "logits_per_char": -0.6335580348968506, "bits_per_byte": 0.9140310350616998, "num_chars": 2}, {"sum_logits": -1.5366969108581543, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.5366969108581543, "logits_per_char": -0.7683484554290771, "bits_per_byte": 1.1084925063230207, "num_chars": 2}, {"sum_logits": -1.9977469444274902, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.9977469444274902, "logits_per_char": -0.9988734722137451, "bits_per_byte": 1.441069804839345, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 418, "native_id": "Mercury_416404", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.190258026123047, "logits_per_token_corr": -2.190258026123047, "logits_per_char_corr": -1.0951290130615234, "bits_per_byte_corr": 1.5799371962786197}, "model_output": [{"sum_logits": -1.0575848817825317, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.0575848817825317, "logits_per_char": -0.5287924408912659, "bits_per_byte": 0.7628862321339477, "num_chars": 2}, {"sum_logits": -1.1368333101272583, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.1368333101272583, "logits_per_char": -0.5684166550636292, "bits_per_byte": 0.8200518894195794, "num_chars": 2}, {"sum_logits": -1.5565167665481567, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.5565167665481567, "logits_per_char": -0.7782583832740784, "bits_per_byte": 1.1227895100805818, "num_chars": 2}, {"sum_logits": -2.190258026123047, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -2.190258026123047, "logits_per_char": -1.0951290130615234, "bits_per_byte": 1.5799371962786197, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 419, "native_id": "Mercury_7103530", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2200658321380615, "logits_per_token_corr": -1.2200658321380615, "logits_per_char_corr": -0.6100329160690308, "bits_per_byte_corr": 0.8800914627924562}, "model_output": [{"sum_logits": -1.434659719467163, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.434659719467163, "logits_per_char": -0.7173298597335815, "bits_per_byte": 1.0348882313199572, "num_chars": 2}, {"sum_logits": -1.2200658321380615, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.2200658321380615, "logits_per_char": -0.6100329160690308, "bits_per_byte": 0.8800914627924562, "num_chars": 2}, {"sum_logits": -1.2259557247161865, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.2259557247161865, "logits_per_char": -0.6129778623580933, "bits_per_byte": 0.8843401221993741, "num_chars": 2}, {"sum_logits": -1.8876664638519287, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.8876664638519287, "logits_per_char": -0.9438332319259644, "bits_per_byte": 1.36166352312677, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 420, "native_id": "Mercury_7030870", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3574351072311401, "logits_per_token_corr": -1.3574351072311401, "logits_per_char_corr": -0.6787175536155701, "bits_per_byte_corr": 0.9791824487661756}, "model_output": [{"sum_logits": -1.3574351072311401, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3574351072311401, "logits_per_char": -0.6787175536155701, "bits_per_byte": 0.9791824487661756, "num_chars": 2}, {"sum_logits": -1.1968401670455933, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.1968401670455933, "logits_per_char": -0.5984200835227966, "bits_per_byte": 0.8633376868673183, "num_chars": 2}, {"sum_logits": -1.4589251279830933, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4589251279830933, "logits_per_char": -0.7294625639915466, "bits_per_byte": 1.0523920235855084, "num_chars": 2}, {"sum_logits": -1.6611822843551636, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.6611822843551636, "logits_per_char": -0.8305911421775818, "bits_per_byte": 1.198289721826758, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 421, "native_id": "LEAP__7_10348", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1935218572616577, "logits_per_token_corr": -1.1935218572616577, "logits_per_char_corr": -0.5967609286308289, "bits_per_byte_corr": 0.860944032332608}, "model_output": [{"sum_logits": -1.6929134130477905, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.6929134130477905, "logits_per_char": -0.8464567065238953, "bits_per_byte": 1.2211788928301057, "num_chars": 2}, {"sum_logits": -1.0527862310409546, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": true, "logits_per_token": -1.0527862310409546, "logits_per_char": -0.5263931155204773, "bits_per_byte": 0.7594247373200295, "num_chars": 2}, {"sum_logits": -1.1935218572616577, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.1935218572616577, "logits_per_char": -0.5967609286308289, "bits_per_byte": 0.860944032332608, "num_chars": 2}, {"sum_logits": -1.9011768102645874, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.9011768102645874, "logits_per_char": -0.9505884051322937, "bits_per_byte": 1.3714091780118942, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 422, "native_id": "Mercury_SC_406835", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1221423149108887, "logits_per_token_corr": -1.1221423149108887, "logits_per_char_corr": -0.5610711574554443, "bits_per_byte_corr": 0.8094545764473818}, "model_output": [{"sum_logits": -1.0817465782165527, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": true, "logits_per_token": -1.0817465782165527, "logits_per_char": -0.5408732891082764, "bits_per_byte": 0.7803152119463735, "num_chars": 2}, {"sum_logits": -1.1221423149108887, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.1221423149108887, "logits_per_char": -0.5610711574554443, "bits_per_byte": 0.8094545764473818, "num_chars": 2}, {"sum_logits": -1.5449137687683105, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.5449137687683105, "logits_per_char": -0.7724568843841553, "bits_per_byte": 1.1144197164023608, "num_chars": 2}, {"sum_logits": -2.237853527069092, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -2.237853527069092, "logits_per_char": -1.118926763534546, "bits_per_byte": 1.614270092870387, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 423, "native_id": "Mercury_178255", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.026628851890564, "logits_per_token_corr": -1.026628851890564, "logits_per_char_corr": -0.513314425945282, "bits_per_byte_corr": 0.7405561767285554}, "model_output": [{"sum_logits": -1.1994060277938843, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.1994060277938843, "logits_per_char": -0.5997030138969421, "bits_per_byte": 0.8651885641559051, "num_chars": 2}, {"sum_logits": -1.026628851890564, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": true, "logits_per_token": -1.026628851890564, "logits_per_char": -0.513314425945282, "bits_per_byte": 0.7405561767285554, "num_chars": 2}, {"sum_logits": -1.7866328954696655, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.7866328954696655, "logits_per_char": -0.8933164477348328, "bits_per_byte": 1.288783209092514, "num_chars": 2}, {"sum_logits": -1.8410457372665405, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.8410457372665405, "logits_per_char": -0.9205228686332703, "bits_per_byte": 1.3280337776030557, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 424, "native_id": "MDSA_2012_8_16", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9093629717826843, "logits_per_token_corr": -0.9093629717826843, "logits_per_char_corr": -0.45468148589134216, "bits_per_byte_corr": 0.6559667248799358}, "model_output": [{"sum_logits": -1.446279764175415, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.446279764175415, "logits_per_char": -0.7231398820877075, "bits_per_byte": 1.0432703217577148, "num_chars": 2}, {"sum_logits": -0.9093629717826843, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -0.9093629717826843, "logits_per_char": -0.45468148589134216, "bits_per_byte": 0.6559667248799358, "num_chars": 2}, {"sum_logits": -1.5846083164215088, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.5846083164215088, "logits_per_char": -0.7923041582107544, "bits_per_byte": 1.1430532799271813, "num_chars": 2}, {"sum_logits": -1.9103853702545166, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.9103853702545166, "logits_per_char": -0.9551926851272583, "bits_per_byte": 1.3780517499274987, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 425, "native_id": "Mercury_409645", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.581769585609436, "logits_per_token_corr": -1.581769585609436, "logits_per_char_corr": -0.790884792804718, "bits_per_byte_corr": 1.1410055684946818}, "model_output": [{"sum_logits": -1.431439995765686, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.431439995765686, "logits_per_char": -0.715719997882843, "bits_per_byte": 1.0325656916113788, "num_chars": 2}, {"sum_logits": -1.4206024408340454, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4206024408340454, "logits_per_char": -0.7103012204170227, "bits_per_byte": 1.0247480482337534, "num_chars": 2}, {"sum_logits": -1.2235103845596313, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.2235103845596313, "logits_per_char": -0.6117551922798157, "bits_per_byte": 0.8825761821407985, "num_chars": 2}, {"sum_logits": -1.581769585609436, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.581769585609436, "logits_per_char": -0.790884792804718, "bits_per_byte": 1.1410055684946818, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 426, "native_id": "TIMSS_2003_8_pg47", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4204572439193726, "logits_per_token_corr": -1.4204572439193726, "logits_per_char_corr": -0.7102286219596863, "bits_per_byte_corr": 1.024643310799378}, "model_output": [{"sum_logits": -1.3307126760482788, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3307126760482788, "logits_per_char": -0.6653563380241394, "bits_per_byte": 0.9599062892921564, "num_chars": 2}, {"sum_logits": -1.105552077293396, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.105552077293396, "logits_per_char": -0.552776038646698, "bits_per_byte": 0.7974872496784101, "num_chars": 2}, {"sum_logits": -1.4204572439193726, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4204572439193726, "logits_per_char": -0.7102286219596863, "bits_per_byte": 1.024643310799378, "num_chars": 2}, {"sum_logits": -1.9483407735824585, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.9483407735824585, "logits_per_char": -0.9741703867912292, "bits_per_byte": 1.4054307860055995, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 427, "native_id": "NYSEDREGENTS_2010_8_16", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4830412864685059, "logits_per_token_corr": -1.4830412864685059, "logits_per_char_corr": -0.7415206432342529, "bits_per_byte_corr": 1.0697881547116195}, "model_output": [{"sum_logits": -1.1788897514343262, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.1788897514343262, "logits_per_char": -0.5894448757171631, "bits_per_byte": 0.8503891990751735, "num_chars": 2}, {"sum_logits": -1.4830412864685059, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4830412864685059, "logits_per_char": -0.7415206432342529, "bits_per_byte": 1.0697881547116195, "num_chars": 2}, {"sum_logits": -1.4457736015319824, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4457736015319824, "logits_per_char": -0.7228868007659912, "bits_per_byte": 1.0429052025899328, "num_chars": 2}, {"sum_logits": -1.509345531463623, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.509345531463623, "logits_per_char": -0.7546727657318115, "bits_per_byte": 1.0887626566160251, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 428, "native_id": "Mercury_7159810", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0128768682479858, "logits_per_token_corr": -1.0128768682479858, "logits_per_char_corr": -0.5064384341239929, "bits_per_byte_corr": 0.7306362174267815}, "model_output": [{"sum_logits": -1.6083225011825562, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.6083225011825562, "logits_per_char": -0.8041612505912781, "bits_per_byte": 1.1601594483039375, "num_chars": 2}, {"sum_logits": -1.0128768682479858, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": true, "logits_per_token": -1.0128768682479858, "logits_per_char": -0.5064384341239929, "bits_per_byte": 0.7306362174267815, "num_chars": 2}, {"sum_logits": -1.3836711645126343, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.3836711645126343, "logits_per_char": -0.6918355822563171, "bits_per_byte": 0.9981077636324344, "num_chars": 2}, {"sum_logits": -1.7758067846298218, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.7758067846298218, "logits_per_char": -0.8879033923149109, "bits_per_byte": 1.28097382088213, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 429, "native_id": "Mercury_7267523", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.28597092628479, "logits_per_token_corr": -1.28597092628479, "logits_per_char_corr": -0.642985463142395, "bits_per_byte_corr": 0.927631939039893}, "model_output": [{"sum_logits": -1.2745726108551025, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.2745726108551025, "logits_per_char": -0.6372863054275513, "bits_per_byte": 0.919409792467438, "num_chars": 2}, {"sum_logits": -1.28597092628479, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.28597092628479, "logits_per_char": -0.642985463142395, "bits_per_byte": 0.927631939039893, "num_chars": 2}, {"sum_logits": -1.3708040714263916, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3708040714263916, "logits_per_char": -0.6854020357131958, "bits_per_byte": 0.9888261179393382, "num_chars": 2}, {"sum_logits": -1.828462839126587, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.828462839126587, "logits_per_char": -0.9142314195632935, "bits_per_byte": 1.3189571352297882, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 430, "native_id": "Mercury_SC_401006", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1487287282943726, "logits_per_token_corr": -1.1487287282943726, "logits_per_char_corr": -0.5743643641471863, "bits_per_byte_corr": 0.8286326198190836}, "model_output": [{"sum_logits": -1.1487287282943726, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.1487287282943726, "logits_per_char": -0.5743643641471863, "bits_per_byte": 0.8286326198190836, "num_chars": 2}, {"sum_logits": -1.2672532796859741, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.2672532796859741, "logits_per_char": -0.6336266398429871, "bits_per_byte": 0.9141300110772714, "num_chars": 2}, {"sum_logits": -1.3821157217025757, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3821157217025757, "logits_per_char": -0.6910578608512878, "bits_per_byte": 0.9969857488182046, "num_chars": 2}, {"sum_logits": -2.0603060722351074, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -2.0603060722351074, "logits_per_char": -1.0301530361175537, "bits_per_byte": 1.4861966765645718, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 431, "native_id": "ACTAAP_2010_7_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.03737211227417, "logits_per_token_corr": -2.03737211227417, "logits_per_char_corr": -1.018686056137085, "bits_per_byte_corr": 1.4696533214127645}, "model_output": [{"sum_logits": -1.3011735677719116, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.3011735677719116, "logits_per_char": -0.6505867838859558, "bits_per_byte": 0.9385983267808425, "num_chars": 2}, {"sum_logits": -1.158631682395935, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": true, "logits_per_token": -1.158631682395935, "logits_per_char": -0.5793158411979675, "bits_per_byte": 0.8357760912053265, "num_chars": 2}, {"sum_logits": -1.3253892660140991, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.3253892660140991, "logits_per_char": -0.6626946330070496, "bits_per_byte": 0.9560662606636888, "num_chars": 2}, {"sum_logits": -2.03737211227417, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -2.03737211227417, "logits_per_char": -1.018686056137085, "bits_per_byte": 1.4696533214127645, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 432, "native_id": "MEAP_2005_8_13", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9232544898986816, "logits_per_token_corr": -1.9232544898986816, "logits_per_char_corr": -0.9616272449493408, "bits_per_byte_corr": 1.3873348574731272}, "model_output": [{"sum_logits": -1.252026081085205, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.252026081085205, "logits_per_char": -0.6260130405426025, "bits_per_byte": 0.9031459091232831, "num_chars": 2}, {"sum_logits": -1.2040200233459473, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.2040200233459473, "logits_per_char": -0.6020100116729736, "bits_per_byte": 0.86851685840673, "num_chars": 2}, {"sum_logits": -1.3696770668029785, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3696770668029785, "logits_per_char": -0.6848385334014893, "bits_per_byte": 0.9880131559487091, "num_chars": 2}, {"sum_logits": -1.9232544898986816, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.9232544898986816, "logits_per_char": -0.9616272449493408, "bits_per_byte": 1.3873348574731272, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 433, "native_id": "Mercury_7164623", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.303775668144226, "logits_per_token_corr": -1.303775668144226, "logits_per_char_corr": -0.651887834072113, "bits_per_byte_corr": 0.9404753454323606}, "model_output": [{"sum_logits": -1.2973765134811401, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.2973765134811401, "logits_per_char": -0.6486882567405701, "bits_per_byte": 0.9358593310831995, "num_chars": 2}, {"sum_logits": -1.431573748588562, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.431573748588562, "logits_per_char": -0.715786874294281, "bits_per_byte": 1.032662173878513, "num_chars": 2}, {"sum_logits": -1.303775668144226, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.303775668144226, "logits_per_char": -0.651887834072113, "bits_per_byte": 0.9404753454323606, "num_chars": 2}, {"sum_logits": -1.6598490476608276, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.6598490476608276, "logits_per_char": -0.8299245238304138, "bits_per_byte": 1.1973279948431326, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 434, "native_id": "Mercury_417127", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.262804388999939, "logits_per_token_corr": -1.262804388999939, "logits_per_char_corr": -0.6314021944999695, "bits_per_byte_corr": 0.9109208148121691}, "model_output": [{"sum_logits": -1.262804388999939, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -1.262804388999939, "logits_per_char": -0.6314021944999695, "bits_per_byte": 0.9109208148121691, "num_chars": 2}, {"sum_logits": -1.1726630926132202, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": true, "logits_per_token": -1.1726630926132202, "logits_per_char": -0.5863315463066101, "bits_per_byte": 0.8458976141739117, "num_chars": 2}, {"sum_logits": -1.3223003149032593, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -1.3223003149032593, "logits_per_char": -0.6611501574516296, "bits_per_byte": 0.9538380534391087, "num_chars": 2}, {"sum_logits": -2.099794864654541, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -2.099794864654541, "logits_per_char": -1.0498974323272705, "bits_per_byte": 1.5146818190616975, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 435, "native_id": "Mercury_411224", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2797234058380127, "logits_per_token_corr": -1.2797234058380127, "logits_per_char_corr": -0.6398617029190063, "bits_per_byte_corr": 0.9231253056566808}, "model_output": [{"sum_logits": -1.6752312183380127, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -1.6752312183380127, "logits_per_char": -0.8376156091690063, "bits_per_byte": 1.2084238855201819, "num_chars": 2}, {"sum_logits": -1.2797234058380127, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -1.2797234058380127, "logits_per_char": -0.6398617029190063, "bits_per_byte": 0.9231253056566808, "num_chars": 2}, {"sum_logits": -1.1944248676300049, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": true, "logits_per_token": -1.1944248676300049, "logits_per_char": -0.5972124338150024, "bits_per_byte": 0.8615954166227513, "num_chars": 2}, {"sum_logits": -1.6822845935821533, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -1.6822845935821533, "logits_per_char": -0.8411422967910767, "bits_per_byte": 1.2135118202633108, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 436, "native_id": "TIMSS_2011_8_pg15", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7962952852249146, "logits_per_token_corr": -1.7962952852249146, "logits_per_char_corr": -0.8981476426124573, "bits_per_byte_corr": 1.295753149984036}, "model_output": [{"sum_logits": -1.5271645784378052, "num_tokens": 1, "num_tokens_all": 442, "is_greedy": false, "logits_per_token": -1.5271645784378052, "logits_per_char": -0.7635822892189026, "bits_per_byte": 1.1016163819675444, "num_chars": 2}, {"sum_logits": -1.1784955263137817, "num_tokens": 1, "num_tokens_all": 442, "is_greedy": true, "logits_per_token": -1.1784955263137817, "logits_per_char": -0.5892477631568909, "bits_per_byte": 0.8501048257619717, "num_chars": 2}, {"sum_logits": -1.2272645235061646, "num_tokens": 1, "num_tokens_all": 442, "is_greedy": false, "logits_per_token": -1.2272645235061646, "logits_per_char": -0.6136322617530823, "bits_per_byte": 0.8852842209612861, "num_chars": 2}, {"sum_logits": -1.7962952852249146, "num_tokens": 1, "num_tokens_all": 442, "is_greedy": false, "logits_per_token": -1.7962952852249146, "logits_per_char": -0.8981476426124573, "bits_per_byte": 1.295753149984036, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 437, "native_id": "NYSEDREGENTS_2012_8_19", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2289903163909912, "logits_per_token_corr": -1.2289903163909912, "logits_per_char_corr": -0.6144951581954956, "bits_per_byte_corr": 0.8865291173795575}, "model_output": [{"sum_logits": -1.56894850730896, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.56894850730896, "logits_per_char": -0.78447425365448, "bits_per_byte": 1.1317571154532022, "num_chars": 2}, {"sum_logits": -1.3337743282318115, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3337743282318115, "logits_per_char": -0.6668871641159058, "bits_per_byte": 0.9621148045032127, "num_chars": 2}, {"sum_logits": -1.2289903163909912, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.2289903163909912, "logits_per_char": -0.6144951581954956, "bits_per_byte": 0.8865291173795575, "num_chars": 2}, {"sum_logits": -1.497894048690796, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.497894048690796, "logits_per_char": -0.748947024345398, "bits_per_byte": 1.0805021579124277, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 438, "native_id": "Mercury_7222460", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4182275533676147, "logits_per_token_corr": -1.4182275533676147, "logits_per_char_corr": -0.7091137766838074, "bits_per_byte_corr": 1.0230349290485077}, "model_output": [{"sum_logits": -1.4182275533676147, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.4182275533676147, "logits_per_char": -0.7091137766838074, "bits_per_byte": 1.0230349290485077, "num_chars": 2}, {"sum_logits": -1.2216752767562866, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": true, "logits_per_token": -1.2216752767562866, "logits_per_char": -0.6108376383781433, "bits_per_byte": 0.8812524316771064, "num_chars": 2}, {"sum_logits": -1.309338927268982, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.309338927268982, "logits_per_char": -0.654669463634491, "bits_per_byte": 0.9444883886075962, "num_chars": 2}, {"sum_logits": -1.7305558919906616, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.7305558919906616, "logits_per_char": -0.8652779459953308, "bits_per_byte": 1.2483322016789489, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 439, "native_id": "Mercury_7007420", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2345696687698364, "logits_per_token_corr": -1.2345696687698364, "logits_per_char_corr": -0.6172848343849182, "bits_per_byte_corr": 0.8905537693837264}, "model_output": [{"sum_logits": -1.4575406312942505, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4575406312942505, "logits_per_char": -0.7287703156471252, "bits_per_byte": 1.0513933203319474, "num_chars": 2}, {"sum_logits": -1.3365670442581177, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3365670442581177, "logits_per_char": -0.6682835221290588, "bits_per_byte": 0.9641293232840957, "num_chars": 2}, {"sum_logits": -1.2345696687698364, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.2345696687698364, "logits_per_char": -0.6172848343849182, "bits_per_byte": 0.8905537693837264, "num_chars": 2}, {"sum_logits": -1.5876485109329224, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.5876485109329224, "logits_per_char": -0.7938242554664612, "bits_per_byte": 1.14524631669966, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 440, "native_id": "Mercury_SC_405710", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7153339385986328, "logits_per_token_corr": -1.7153339385986328, "logits_per_char_corr": -0.8576669692993164, "bits_per_byte_corr": 1.2373518833432795}, "model_output": [{"sum_logits": -1.3951683044433594, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3951683044433594, "logits_per_char": -0.6975841522216797, "bits_per_byte": 1.0064011970136721, "num_chars": 2}, {"sum_logits": -1.1968421936035156, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.1968421936035156, "logits_per_char": -0.5984210968017578, "bits_per_byte": 0.8633391487198506, "num_chars": 2}, {"sum_logits": -1.348104476928711, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.348104476928711, "logits_per_char": -0.6740522384643555, "bits_per_byte": 0.9724518217333292, "num_chars": 2}, {"sum_logits": -1.7153339385986328, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.7153339385986328, "logits_per_char": -0.8576669692993164, "bits_per_byte": 1.2373518833432795, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 441, "native_id": "Mercury_SC_401375", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.269241452217102, "logits_per_token_corr": -1.269241452217102, "logits_per_char_corr": -0.634620726108551, "bits_per_byte_corr": 0.9155641744028175}, "model_output": [{"sum_logits": -1.491531491279602, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.491531491279602, "logits_per_char": -0.745765745639801, "bits_per_byte": 1.075912542900174, "num_chars": 2}, {"sum_logits": -1.5890694856643677, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.5890694856643677, "logits_per_char": -0.7945347428321838, "bits_per_byte": 1.146271333298803, "num_chars": 2}, {"sum_logits": -1.269241452217102, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.269241452217102, "logits_per_char": -0.634620726108551, "bits_per_byte": 0.9155641744028175, "num_chars": 2}, {"sum_logits": -1.314865231513977, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.314865231513977, "logits_per_char": -0.6574326157569885, "bits_per_byte": 0.9484747744719482, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 442, "native_id": "VASoL_2010_3_22", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4273678064346313, "logits_per_token_corr": -1.4273678064346313, "logits_per_char_corr": -0.7136839032173157, "bits_per_byte_corr": 1.02962822793464}, "model_output": [{"sum_logits": -1.4273678064346313, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4273678064346313, "logits_per_char": -0.7136839032173157, "bits_per_byte": 1.02962822793464, "num_chars": 2}, {"sum_logits": -1.1609207391738892, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.1609207391738892, "logits_per_char": -0.5804603695869446, "bits_per_byte": 0.8374272966362615, "num_chars": 2}, {"sum_logits": -1.3312546014785767, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3312546014785767, "logits_per_char": -0.6656273007392883, "bits_per_byte": 0.9602972058575678, "num_chars": 2}, {"sum_logits": -1.7860504388809204, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.7860504388809204, "logits_per_char": -0.8930252194404602, "bits_per_byte": 1.2883630554764558, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 443, "native_id": "Mercury_SC_408358", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1643157005310059, "logits_per_token_corr": -1.1643157005310059, "logits_per_char_corr": -0.5821578502655029, "bits_per_byte_corr": 0.8398762435932242}, "model_output": [{"sum_logits": -1.1643157005310059, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.1643157005310059, "logits_per_char": -0.5821578502655029, "bits_per_byte": 0.8398762435932242, "num_chars": 2}, {"sum_logits": -1.08443021774292, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.08443021774292, "logits_per_char": -0.54221510887146, "bits_per_byte": 0.7822510486644867, "num_chars": 2}, {"sum_logits": -1.4334654808044434, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4334654808044434, "logits_per_char": -0.7167327404022217, "bits_per_byte": 1.034026770221785, "num_chars": 2}, {"sum_logits": -2.2778048515319824, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -2.2778048515319824, "logits_per_char": -1.1389024257659912, "bits_per_byte": 1.6430888817101867, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 444, "native_id": "NYSEDREGENTS_2013_8_42", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.579119086265564, "logits_per_token_corr": -1.579119086265564, "logits_per_char_corr": -0.789559543132782, "bits_per_byte_corr": 1.1390936373650384}, "model_output": [{"sum_logits": -1.376186728477478, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.376186728477478, "logits_per_char": -0.688093364238739, "bits_per_byte": 0.9927088842565451, "num_chars": 2}, {"sum_logits": -1.579119086265564, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.579119086265564, "logits_per_char": -0.789559543132782, "bits_per_byte": 1.1390936373650384, "num_chars": 2}, {"sum_logits": -1.3032325506210327, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.3032325506210327, "logits_per_char": -0.6516162753105164, "bits_per_byte": 0.9400835689536948, "num_chars": 2}, {"sum_logits": -1.389898657798767, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.389898657798767, "logits_per_char": -0.6949493288993835, "bits_per_byte": 1.0025999504729741, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 445, "native_id": "Mercury_SC_400661", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.832129716873169, "logits_per_token_corr": -1.832129716873169, "logits_per_char_corr": -0.9160648584365845, "bits_per_byte_corr": 1.3216022284000601}, "model_output": [{"sum_logits": -1.3546650409698486, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3546650409698486, "logits_per_char": -0.6773325204849243, "bits_per_byte": 0.9771842683371246, "num_chars": 2}, {"sum_logits": -1.1813194751739502, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.1813194751739502, "logits_per_char": -0.5906597375869751, "bits_per_byte": 0.8521418742701177, "num_chars": 2}, {"sum_logits": -1.336686372756958, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.336686372756958, "logits_per_char": -0.668343186378479, "bits_per_byte": 0.9642154006008525, "num_chars": 2}, {"sum_logits": -1.832129716873169, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.832129716873169, "logits_per_char": -0.9160648584365845, "bits_per_byte": 1.3216022284000601, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 446, "native_id": "Mercury_SC_415422", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4438689947128296, "logits_per_token_corr": -1.4438689947128296, "logits_per_char_corr": -0.7219344973564148, "bits_per_byte_corr": 1.0415313191835143}, "model_output": [{"sum_logits": -1.2548202276229858, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.2548202276229858, "logits_per_char": -0.6274101138114929, "bits_per_byte": 0.9051614598000712, "num_chars": 2}, {"sum_logits": -0.9694350957870483, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -0.9694350957870483, "logits_per_char": -0.48471754789352417, "bits_per_byte": 0.6992996025783483, "num_chars": 2}, {"sum_logits": -1.4438689947128296, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4438689947128296, "logits_per_char": -0.7219344973564148, "bits_per_byte": 1.0415313191835143, "num_chars": 2}, {"sum_logits": -2.383934497833252, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -2.383934497833252, "logits_per_char": -1.191967248916626, "bits_per_byte": 1.7196452389153123, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 447, "native_id": "Mercury_SC_400162", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9859539270401, "logits_per_token_corr": -1.9859539270401, "logits_per_char_corr": -0.99297696352005, "bits_per_byte_corr": 1.4325629409883864}, "model_output": [{"sum_logits": -1.2635666131973267, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.2635666131973267, "logits_per_char": -0.6317833065986633, "bits_per_byte": 0.9114706433469779, "num_chars": 2}, {"sum_logits": -1.160333275794983, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.160333275794983, "logits_per_char": -0.5801666378974915, "bits_per_byte": 0.8370035313845352, "num_chars": 2}, {"sum_logits": -1.3790851831436157, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3790851831436157, "logits_per_char": -0.6895425915718079, "bits_per_byte": 0.9947996773430858, "num_chars": 2}, {"sum_logits": -1.9859539270401, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.9859539270401, "logits_per_char": -0.99297696352005, "bits_per_byte": 1.4325629409883864, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 448, "native_id": "Mercury_7212328", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.291038990020752, "logits_per_token_corr": -1.291038990020752, "logits_per_char_corr": -0.645519495010376, "bits_per_byte_corr": 0.9312877742492864}, "model_output": [{"sum_logits": -1.3502488136291504, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.3502488136291504, "logits_per_char": -0.6751244068145752, "bits_per_byte": 0.9739986336951905, "num_chars": 2}, {"sum_logits": -1.291038990020752, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": true, "logits_per_token": -1.291038990020752, "logits_per_char": -0.645519495010376, "bits_per_byte": 0.9312877742492864, "num_chars": 2}, {"sum_logits": -1.43986177444458, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.43986177444458, "logits_per_char": -0.71993088722229, "bits_per_byte": 1.0386407207791357, "num_chars": 2}, {"sum_logits": -1.5640969276428223, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.5640969276428223, "logits_per_char": -0.7820484638214111, "bits_per_byte": 1.1282574404907924, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 449, "native_id": "NCEOGA_2013_8_26", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9980108737945557, "logits_per_token_corr": -1.9980108737945557, "logits_per_char_corr": -0.9990054368972778, "bits_per_byte_corr": 1.4412601896338504}, "model_output": [{"sum_logits": -1.5596106052398682, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.5596106052398682, "logits_per_char": -0.7798053026199341, "bits_per_byte": 1.1250212429495046, "num_chars": 2}, {"sum_logits": -0.9565451145172119, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -0.9565451145172119, "logits_per_char": -0.47827255725860596, "bits_per_byte": 0.6900014465507693, "num_chars": 2}, {"sum_logits": -1.3359549045562744, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3359549045562744, "logits_per_char": -0.6679774522781372, "bits_per_byte": 0.9636877578280051, "num_chars": 2}, {"sum_logits": -1.9980108737945557, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.9980108737945557, "logits_per_char": -0.9990054368972778, "bits_per_byte": 1.4412601896338504, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 450, "native_id": "Mercury_SC_407696", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.261970043182373, "logits_per_token_corr": -1.261970043182373, "logits_per_char_corr": -0.6309850215911865, "bits_per_byte_corr": 0.9103189615254743}, "model_output": [{"sum_logits": -1.0729689598083496, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.0729689598083496, "logits_per_char": -0.5364844799041748, "bits_per_byte": 0.7739834986722038, "num_chars": 2}, {"sum_logits": -1.261970043182373, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.261970043182373, "logits_per_char": -0.6309850215911865, "bits_per_byte": 0.9103189615254743, "num_chars": 2}, {"sum_logits": -1.4437098503112793, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4437098503112793, "logits_per_char": -0.7218549251556396, "bits_per_byte": 1.0414165207640633, "num_chars": 2}, {"sum_logits": -2.0764899253845215, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -2.0764899253845215, "logits_per_char": -1.0382449626922607, "bits_per_byte": 1.4978708589051475, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 451, "native_id": "Mercury_SC_400052", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2767393589019775, "logits_per_token_corr": -1.2767393589019775, "logits_per_char_corr": -0.6383696794509888, "bits_per_byte_corr": 0.9209727707984804}, "model_output": [{"sum_logits": -1.4125425815582275, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4125425815582275, "logits_per_char": -0.7062712907791138, "bits_per_byte": 1.0189340887300065, "num_chars": 2}, {"sum_logits": -1.2767393589019775, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.2767393589019775, "logits_per_char": -0.6383696794509888, "bits_per_byte": 0.9209727707984804, "num_chars": 2}, {"sum_logits": -1.2376196384429932, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.2376196384429932, "logits_per_char": -0.6188098192214966, "bits_per_byte": 0.8927538574448904, "num_chars": 2}, {"sum_logits": -1.729706048965454, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.729706048965454, "logits_per_char": -0.864853024482727, "bits_per_byte": 1.2477191695199479, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 452, "native_id": "Mercury_7212870", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6879241466522217, "logits_per_token_corr": -1.6879241466522217, "logits_per_char_corr": -0.8439620733261108, "bits_per_byte_corr": 1.2175798978868226}, "model_output": [{"sum_logits": -1.5384414196014404, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5384414196014404, "logits_per_char": -0.7692207098007202, "bits_per_byte": 1.1097509033793849, "num_chars": 2}, {"sum_logits": -1.1413276195526123, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.1413276195526123, "logits_per_char": -0.5706638097763062, "bits_per_byte": 0.8232938483796711, "num_chars": 2}, {"sum_logits": -1.3397510051727295, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3397510051727295, "logits_per_char": -0.6698755025863647, "bits_per_byte": 0.9664260655950447, "num_chars": 2}, {"sum_logits": -1.6879241466522217, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.6879241466522217, "logits_per_char": -0.8439620733261108, "bits_per_byte": 1.2175798978868226, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 453, "native_id": "NYSEDREGENTS_2010_8_35", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5151933431625366, "logits_per_token_corr": -1.5151933431625366, "logits_per_char_corr": -0.7575966715812683, "bits_per_byte_corr": 1.0929809610850656}, "model_output": [{"sum_logits": -1.2264055013656616, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.2264055013656616, "logits_per_char": -0.6132027506828308, "bits_per_byte": 0.884664567470227, "num_chars": 2}, {"sum_logits": -1.220298171043396, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.220298171043396, "logits_per_char": -0.610149085521698, "bits_per_byte": 0.8802590598857222, "num_chars": 2}, {"sum_logits": -1.5151933431625366, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.5151933431625366, "logits_per_char": -0.7575966715812683, "bits_per_byte": 1.0929809610850656, "num_chars": 2}, {"sum_logits": -1.7207978963851929, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.7207978963851929, "logits_per_char": -0.8603989481925964, "bits_per_byte": 1.2412932957444307, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 454, "native_id": "MCAS_2010_8_12005", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1985626220703125, "logits_per_token_corr": -1.1985626220703125, "logits_per_char_corr": -0.5992813110351562, "bits_per_byte_corr": 0.8645801755284774}, "model_output": [{"sum_logits": -1.1985626220703125, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.1985626220703125, "logits_per_char": -0.5992813110351562, "bits_per_byte": 0.8645801755284774, "num_chars": 2}, {"sum_logits": -1.0997772216796875, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": true, "logits_per_token": -1.0997772216796875, "logits_per_char": -0.5498886108398438, "bits_per_byte": 0.7933215719005337, "num_chars": 2}, {"sum_logits": -1.447347640991211, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.447347640991211, "logits_per_char": -0.7236738204956055, "bits_per_byte": 1.04404063205093, "num_chars": 2}, {"sum_logits": -2.125917434692383, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -2.125917434692383, "logits_per_char": -1.0629587173461914, "bits_per_byte": 1.5335252701861455, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 455, "native_id": "Mercury_7218505", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9277461767196655, "logits_per_token_corr": -0.9277461767196655, "logits_per_char_corr": -0.46387308835983276, "bits_per_byte_corr": 0.6692274041790595}, "model_output": [{"sum_logits": -1.6801151037216187, "num_tokens": 1, "num_tokens_all": 415, "is_greedy": false, "logits_per_token": -1.6801151037216187, "logits_per_char": -0.8400575518608093, "bits_per_byte": 1.2119468641317834, "num_chars": 2}, {"sum_logits": -0.9277461767196655, "num_tokens": 1, "num_tokens_all": 415, "is_greedy": true, "logits_per_token": -0.9277461767196655, "logits_per_char": -0.46387308835983276, "bits_per_byte": 0.6692274041790595, "num_chars": 2}, {"sum_logits": -1.3985999822616577, "num_tokens": 1, "num_tokens_all": 415, "is_greedy": false, "logits_per_token": -1.3985999822616577, "logits_per_char": -0.6992999911308289, "bits_per_byte": 1.0088766292988678, "num_chars": 2}, {"sum_logits": -1.8216537237167358, "num_tokens": 1, "num_tokens_all": 415, "is_greedy": false, "logits_per_token": -1.8216537237167358, "logits_per_char": -0.9108268618583679, "bits_per_byte": 1.3140453967124683, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 456, "native_id": "Mercury_SC_400853", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9031096696853638, "logits_per_token_corr": -1.9031096696853638, "logits_per_char_corr": -0.9515548348426819, "bits_per_byte_corr": 1.37280344136244}, "model_output": [{"sum_logits": -1.2450381517410278, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.2450381517410278, "logits_per_char": -0.6225190758705139, "bits_per_byte": 0.898105183617816, "num_chars": 2}, {"sum_logits": -1.205678105354309, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.205678105354309, "logits_per_char": -0.6028390526771545, "bits_per_byte": 0.8697129117521563, "num_chars": 2}, {"sum_logits": -1.3773242235183716, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3773242235183716, "logits_per_char": -0.6886621117591858, "bits_per_byte": 0.9935294134838122, "num_chars": 2}, {"sum_logits": -1.9031096696853638, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.9031096696853638, "logits_per_char": -0.9515548348426819, "bits_per_byte": 1.37280344136244, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 457, "native_id": "Mercury_7210455", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.514344334602356, "logits_per_token_corr": -1.514344334602356, "logits_per_char_corr": -0.757172167301178, "bits_per_byte_corr": 1.0923685308653428}, "model_output": [{"sum_logits": -1.2126532793045044, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.2126532793045044, "logits_per_char": -0.6063266396522522, "bits_per_byte": 0.8747444361858022, "num_chars": 2}, {"sum_logits": -0.9974223375320435, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -0.9974223375320435, "logits_per_char": -0.49871116876602173, "bits_per_byte": 0.7194881300151954, "num_chars": 2}, {"sum_logits": -1.514344334602356, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.514344334602356, "logits_per_char": -0.757172167301178, "bits_per_byte": 1.0923685308653428, "num_chars": 2}, {"sum_logits": -2.274683952331543, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -2.274683952331543, "logits_per_char": -1.1373419761657715, "bits_per_byte": 1.640837628810391, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 458, "native_id": "Mercury_7174738", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2910175323486328, "logits_per_token_corr": -1.2910175323486328, "logits_per_char_corr": -0.6455087661743164, "bits_per_byte_corr": 0.9312722958107088}, "model_output": [{"sum_logits": -1.5648155212402344, "num_tokens": 1, "num_tokens_all": 454, "is_greedy": false, "logits_per_token": -1.5648155212402344, "logits_per_char": -0.7824077606201172, "bits_per_byte": 1.1287757962004932, "num_chars": 2}, {"sum_logits": -1.0853424072265625, "num_tokens": 1, "num_tokens_all": 454, "is_greedy": true, "logits_per_token": -1.0853424072265625, "logits_per_char": -0.5426712036132812, "bits_per_byte": 0.7829090542866882, "num_chars": 2}, {"sum_logits": -1.2910175323486328, "num_tokens": 1, "num_tokens_all": 454, "is_greedy": false, "logits_per_token": -1.2910175323486328, "logits_per_char": -0.6455087661743164, "bits_per_byte": 0.9312722958107088, "num_chars": 2}, {"sum_logits": -1.7911415100097656, "num_tokens": 1, "num_tokens_all": 454, "is_greedy": false, "logits_per_token": -1.7911415100097656, "logits_per_char": -0.8955707550048828, "bits_per_byte": 1.2920354870116575, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 459, "native_id": "MCAS_2001_5_2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8853029012680054, "logits_per_token_corr": -1.8853029012680054, "logits_per_char_corr": -0.9426514506340027, "bits_per_byte_corr": 1.3599585731174402}, "model_output": [{"sum_logits": -1.3663419485092163, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3663419485092163, "logits_per_char": -0.6831709742546082, "bits_per_byte": 0.985607376637113, "num_chars": 2}, {"sum_logits": -1.2713464498519897, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.2713464498519897, "logits_per_char": -0.6356732249259949, "bits_per_byte": 0.9170826092272862, "num_chars": 2}, {"sum_logits": -1.2032274007797241, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.2032274007797241, "logits_per_char": -0.6016137003898621, "bits_per_byte": 0.8679451020839362, "num_chars": 2}, {"sum_logits": -1.8853029012680054, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.8853029012680054, "logits_per_char": -0.9426514506340027, "bits_per_byte": 1.3599585731174402, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 460, "native_id": "NYSEDREGENTS_2012_4_9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.119746446609497, "logits_per_token_corr": -1.119746446609497, "logits_per_char_corr": -0.5598732233047485, "bits_per_byte_corr": 0.8077263227888603}, "model_output": [{"sum_logits": -1.119746446609497, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.119746446609497, "logits_per_char": -0.5598732233047485, "bits_per_byte": 0.8077263227888603, "num_chars": 2}, {"sum_logits": -1.3675968647003174, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3675968647003174, "logits_per_char": -0.6837984323501587, "bits_per_byte": 0.9865126073199301, "num_chars": 2}, {"sum_logits": -1.5627930164337158, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.5627930164337158, "logits_per_char": -0.7813965082168579, "bits_per_byte": 1.127316867373223, "num_chars": 2}, {"sum_logits": -1.642371416091919, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.642371416091919, "logits_per_char": -0.8211857080459595, "bits_per_byte": 1.184720548647649, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 461, "native_id": "Mercury_416593", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0943933725357056, "logits_per_token_corr": -1.0943933725357056, "logits_per_char_corr": -0.5471966862678528, "bits_per_byte_corr": 0.7894379456700723}, "model_output": [{"sum_logits": -1.4724260568618774, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4724260568618774, "logits_per_char": -0.7362130284309387, "bits_per_byte": 1.0621308851559237, "num_chars": 2}, {"sum_logits": -1.0943933725357056, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.0943933725357056, "logits_per_char": -0.5471966862678528, "bits_per_byte": 0.7894379456700723, "num_chars": 2}, {"sum_logits": -1.5429388284683228, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.5429388284683228, "logits_per_char": -0.7714694142341614, "bits_per_byte": 1.1129950981139378, "num_chars": 2}, {"sum_logits": -1.5924879312515259, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.5924879312515259, "logits_per_char": -0.7962439656257629, "bits_per_byte": 1.1487372205468758, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 462, "native_id": "Mercury_7205870", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9720384478569031, "logits_per_token_corr": -0.9720384478569031, "logits_per_char_corr": -0.48601922392845154, "bits_per_byte_corr": 0.7011775241387834}, "model_output": [{"sum_logits": -1.5067503452301025, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.5067503452301025, "logits_per_char": -0.7533751726150513, "bits_per_byte": 1.0868906254613822, "num_chars": 2}, {"sum_logits": -0.9720384478569031, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": true, "logits_per_token": -0.9720384478569031, "logits_per_char": -0.48601922392845154, "bits_per_byte": 0.7011775241387834, "num_chars": 2}, {"sum_logits": -1.4247591495513916, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.4247591495513916, "logits_per_char": -0.7123795747756958, "bits_per_byte": 1.0277464797602232, "num_chars": 2}, {"sum_logits": -1.9206793308258057, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.9206793308258057, "logits_per_char": -0.9603396654129028, "bits_per_byte": 1.3854772728611566, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 463, "native_id": "Mercury_SC_401798", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2328524589538574, "logits_per_token_corr": -1.2328524589538574, "logits_per_char_corr": -0.6164262294769287, "bits_per_byte_corr": 0.8893150643408861}, "model_output": [{"sum_logits": -1.3998160362243652, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3998160362243652, "logits_per_char": -0.6999080181121826, "bits_per_byte": 1.009753826809594, "num_chars": 2}, {"sum_logits": -1.2728991508483887, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.2728991508483887, "logits_per_char": -0.6364495754241943, "bits_per_byte": 0.9182026462410311, "num_chars": 2}, {"sum_logits": -1.2328524589538574, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2328524589538574, "logits_per_char": -0.6164262294769287, "bits_per_byte": 0.8893150643408861, "num_chars": 2}, {"sum_logits": -1.7630095481872559, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.7630095481872559, "logits_per_char": -0.8815047740936279, "bits_per_byte": 1.2717425661057367, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 464, "native_id": "Mercury_7084228", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8530681133270264, "logits_per_token_corr": -1.8530681133270264, "logits_per_char_corr": -0.9265340566635132, "bits_per_byte_corr": 1.3367060887641447}, "model_output": [{"sum_logits": -1.4208476543426514, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4208476543426514, "logits_per_char": -0.7104238271713257, "bits_per_byte": 1.024924932390166, "num_chars": 2}, {"sum_logits": -1.0729682445526123, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.0729682445526123, "logits_per_char": -0.5364841222763062, "bits_per_byte": 0.7739829827242513, "num_chars": 2}, {"sum_logits": -1.410170316696167, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.410170316696167, "logits_per_char": -0.7050851583480835, "bits_per_byte": 1.0172228613539203, "num_chars": 2}, {"sum_logits": -1.8530681133270264, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.8530681133270264, "logits_per_char": -0.9265340566635132, "bits_per_byte": 1.3367060887641447, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 465, "native_id": "Mercury_417460", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1995829343795776, "logits_per_token_corr": -1.1995829343795776, "logits_per_char_corr": -0.5997914671897888, "bits_per_byte_corr": 0.8653161752828454}, "model_output": [{"sum_logits": -1.1995829343795776, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.1995829343795776, "logits_per_char": -0.5997914671897888, "bits_per_byte": 0.8653161752828454, "num_chars": 2}, {"sum_logits": -1.2205504179000854, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.2205504179000854, "logits_per_char": -0.6102752089500427, "bits_per_byte": 0.8804410175303351, "num_chars": 2}, {"sum_logits": -1.329400897026062, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.329400897026062, "logits_per_char": -0.664700448513031, "bits_per_byte": 0.9589600407471085, "num_chars": 2}, {"sum_logits": -2.1026153564453125, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -2.1026153564453125, "logits_per_char": -1.0513076782226562, "bits_per_byte": 1.516716373821406, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 466, "native_id": "Mercury_402539", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0083504915237427, "logits_per_token_corr": -1.0083504915237427, "logits_per_char_corr": -0.5041752457618713, "bits_per_byte_corr": 0.7273711268001487}, "model_output": [{"sum_logits": -1.2982522249221802, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.2982522249221802, "logits_per_char": -0.6491261124610901, "bits_per_byte": 0.936491023359819, "num_chars": 2}, {"sum_logits": -1.0083504915237427, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": true, "logits_per_token": -1.0083504915237427, "logits_per_char": -0.5041752457618713, "bits_per_byte": 0.7273711268001487, "num_chars": 2}, {"sum_logits": -1.5175668001174927, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.5175668001174927, "logits_per_char": -0.7587834000587463, "bits_per_byte": 1.0946930483744062, "num_chars": 2}, {"sum_logits": -2.0398731231689453, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -2.0398731231689453, "logits_per_char": -1.0199365615844727, "bits_per_byte": 1.4714574194203167, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 467, "native_id": "Mercury_406800", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1642422676086426, "logits_per_token_corr": -1.1642422676086426, "logits_per_char_corr": -0.5821211338043213, "bits_per_byte_corr": 0.8398232729367584}, "model_output": [{"sum_logits": -1.2743268013000488, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.2743268013000488, "logits_per_char": -0.6371634006500244, "bits_per_byte": 0.9192324783543984, "num_chars": 2}, {"sum_logits": -1.1642422676086426, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.1642422676086426, "logits_per_char": -0.5821211338043213, "bits_per_byte": 0.8398232729367584, "num_chars": 2}, {"sum_logits": -1.4187531471252441, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4187531471252441, "logits_per_char": -0.7093765735626221, "bits_per_byte": 1.023414064802335, "num_chars": 2}, {"sum_logits": -1.876610279083252, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.876610279083252, "logits_per_char": -0.938305139541626, "bits_per_byte": 1.3536881716583031, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 468, "native_id": "Mercury_SC_408321", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3033429384231567, "logits_per_token_corr": -1.3033429384231567, "logits_per_char_corr": -0.6516714692115784, "bits_per_byte_corr": 0.9401631969210443}, "model_output": [{"sum_logits": -1.3033429384231567, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3033429384231567, "logits_per_char": -0.6516714692115784, "bits_per_byte": 0.9401631969210443, "num_chars": 2}, {"sum_logits": -1.0536423921585083, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.0536423921585083, "logits_per_char": -0.5268211960792542, "bits_per_byte": 0.7600423270192782, "num_chars": 2}, {"sum_logits": -1.4701653718948364, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4701653718948364, "logits_per_char": -0.7350826859474182, "bits_per_byte": 1.0605001456604415, "num_chars": 2}, {"sum_logits": -1.9792271852493286, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.9792271852493286, "logits_per_char": -0.9896135926246643, "bits_per_byte": 1.4277106224769398, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 469, "native_id": "Mercury_SC_406836", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4775270223617554, "logits_per_token_corr": -1.4775270223617554, "logits_per_char_corr": -0.7387635111808777, "bits_per_byte_corr": 1.0658104539711362}, "model_output": [{"sum_logits": -1.1830209493637085, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.1830209493637085, "logits_per_char": -0.5915104746818542, "bits_per_byte": 0.853369228458001, "num_chars": 2}, {"sum_logits": -1.0681394338607788, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": true, "logits_per_token": -1.0681394338607788, "logits_per_char": -0.5340697169303894, "bits_per_byte": 0.7704997321049988, "num_chars": 2}, {"sum_logits": -1.4775270223617554, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.4775270223617554, "logits_per_char": -0.7387635111808777, "bits_per_byte": 1.0658104539711362, "num_chars": 2}, {"sum_logits": -2.2000327110290527, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -2.2000327110290527, "logits_per_char": -1.1000163555145264, "bits_per_byte": 1.5869881409986983, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 470, "native_id": "Mercury_SC_410963", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.0801496505737305, "logits_per_token_corr": -2.0801496505737305, "logits_per_char_corr": -1.0400748252868652, "bits_per_byte_corr": 1.5005107925958936}, "model_output": [{"sum_logits": -1.319077491760254, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.319077491760254, "logits_per_char": -0.659538745880127, "bits_per_byte": 0.951513277956069, "num_chars": 2}, {"sum_logits": -1.0747594833374023, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.0747594833374023, "logits_per_char": -0.5373797416687012, "bits_per_byte": 0.7752750883801844, "num_chars": 2}, {"sum_logits": -1.360703468322754, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.360703468322754, "logits_per_char": -0.680351734161377, "bits_per_byte": 0.98154007293553, "num_chars": 2}, {"sum_logits": -2.0801496505737305, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -2.0801496505737305, "logits_per_char": -1.0400748252868652, "bits_per_byte": 1.5005107925958936, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 471, "native_id": "Mercury_7132405", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9386868476867676, "logits_per_token_corr": -1.9386868476867676, "logits_per_char_corr": -0.9693434238433838, "bits_per_byte_corr": 1.398466950498183}, "model_output": [{"sum_logits": -1.2099690437316895, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2099690437316895, "logits_per_char": -0.6049845218658447, "bits_per_byte": 0.8728081695110619, "num_chars": 2}, {"sum_logits": -1.2323212623596191, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.2323212623596191, "logits_per_char": -0.6161606311798096, "bits_per_byte": 0.8889318869947636, "num_chars": 2}, {"sum_logits": -1.3647656440734863, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3647656440734863, "logits_per_char": -0.6823828220367432, "bits_per_byte": 0.9844703133409327, "num_chars": 2}, {"sum_logits": -1.9386868476867676, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.9386868476867676, "logits_per_char": -0.9693434238433838, "bits_per_byte": 1.398466950498183, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 472, "native_id": "Mercury_SC_408872", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2985713481903076, "logits_per_token_corr": -1.2985713481903076, "logits_per_char_corr": -0.6492856740951538, "bits_per_byte_corr": 0.936721222137999}, "model_output": [{"sum_logits": -1.5127742290496826, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.5127742290496826, "logits_per_char": -0.7563871145248413, "bits_per_byte": 1.091235939118085, "num_chars": 2}, {"sum_logits": -1.1976745128631592, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.1976745128631592, "logits_per_char": -0.5988372564315796, "bits_per_byte": 0.8639395401540131, "num_chars": 2}, {"sum_logits": -1.2985713481903076, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.2985713481903076, "logits_per_char": -0.6492856740951538, "bits_per_byte": 0.936721222137999, "num_chars": 2}, {"sum_logits": -1.6622130870819092, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.6622130870819092, "logits_per_char": -0.8311065435409546, "bits_per_byte": 1.199033288817764, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 473, "native_id": "VASoL_2008_3_25", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2125470638275146, "logits_per_token_corr": -1.2125470638275146, "logits_per_char_corr": -0.6062735319137573, "bits_per_byte_corr": 0.8746678179148428}, "model_output": [{"sum_logits": -1.505979299545288, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.505979299545288, "logits_per_char": -0.752989649772644, "bits_per_byte": 1.0863344335684915, "num_chars": 2}, {"sum_logits": -1.2125470638275146, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.2125470638275146, "logits_per_char": -0.6062735319137573, "bits_per_byte": 0.8746678179148428, "num_chars": 2}, {"sum_logits": -1.2729852199554443, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.2729852199554443, "logits_per_char": -0.6364926099777222, "bits_per_byte": 0.9182647319779926, "num_chars": 2}, {"sum_logits": -1.728168249130249, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.728168249130249, "logits_per_char": -0.8640841245651245, "bits_per_byte": 1.246609881421882, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 474, "native_id": "WASL_2005_8_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.536287784576416, "logits_per_token_corr": -1.536287784576416, "logits_per_char_corr": -0.768143892288208, "bits_per_byte_corr": 1.10819738409414}, "model_output": [{"sum_logits": -1.536287784576416, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -1.536287784576416, "logits_per_char": -0.768143892288208, "bits_per_byte": 1.10819738409414, "num_chars": 2}, {"sum_logits": -0.959923267364502, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": true, "logits_per_token": -0.959923267364502, "logits_per_char": -0.479961633682251, "bits_per_byte": 0.6924382687308462, "num_chars": 2}, {"sum_logits": -1.317680835723877, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -1.317680835723877, "logits_per_char": -0.6588404178619385, "bits_per_byte": 0.950505803587314, "num_chars": 2}, {"sum_logits": -2.0639634132385254, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -2.0639634132385254, "logits_per_char": -1.0319817066192627, "bits_per_byte": 1.4888348904288091, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 475, "native_id": "AKDE&ED_2012_8_20", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.345056176185608, "logits_per_token_corr": -1.345056176185608, "logits_per_char_corr": -0.672528088092804, "bits_per_byte_corr": 0.9702529375507213}, "model_output": [{"sum_logits": -1.62529718875885, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.62529718875885, "logits_per_char": -0.812648594379425, "bits_per_byte": 1.1724040970974257, "num_chars": 2}, {"sum_logits": -1.345056176185608, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.345056176185608, "logits_per_char": -0.672528088092804, "bits_per_byte": 0.9702529375507213, "num_chars": 2}, {"sum_logits": -1.2197223901748657, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2197223901748657, "logits_per_char": -0.6098611950874329, "bits_per_byte": 0.8798437217838881, "num_chars": 2}, {"sum_logits": -1.4400955438613892, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4400955438613892, "logits_per_char": -0.7200477719306946, "bits_per_byte": 1.0388093497683066, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 476, "native_id": "Mercury_7056823", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3007646799087524, "logits_per_token_corr": -1.3007646799087524, "logits_per_char_corr": -0.6503823399543762, "bits_per_byte_corr": 0.9383033765346126}, "model_output": [{"sum_logits": -1.1965147256851196, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.1965147256851196, "logits_per_char": -0.5982573628425598, "bits_per_byte": 0.8631029305488903, "num_chars": 2}, {"sum_logits": -1.3007646799087524, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3007646799087524, "logits_per_char": -0.6503823399543762, "bits_per_byte": 0.9383033765346126, "num_chars": 2}, {"sum_logits": -1.432808518409729, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.432808518409729, "logits_per_char": -0.7164042592048645, "bits_per_byte": 1.0335528720273321, "num_chars": 2}, {"sum_logits": -1.7876116037368774, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.7876116037368774, "logits_per_char": -0.8938058018684387, "bits_per_byte": 1.2894891978743064, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 477, "native_id": "Mercury_7205800", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.7777270078659058, "logits_per_token_corr": -0.7777270078659058, "logits_per_char_corr": -0.3888635039329529, "bits_per_byte_corr": 0.5610114487071801}, "model_output": [{"sum_logits": -1.4596270322799683, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4596270322799683, "logits_per_char": -0.7298135161399841, "bits_per_byte": 1.052898340509649, "num_chars": 2}, {"sum_logits": -0.7777270078659058, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -0.7777270078659058, "logits_per_char": -0.3888635039329529, "bits_per_byte": 0.5610114487071801, "num_chars": 2}, {"sum_logits": -1.5821837186813354, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.5821837186813354, "logits_per_char": -0.7910918593406677, "bits_per_byte": 1.1413043023592306, "num_chars": 2}, {"sum_logits": -2.362499237060547, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -2.362499237060547, "logits_per_char": -1.1812496185302734, "bits_per_byte": 1.7041829667068296, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 478, "native_id": "Mercury_SC_402282", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.448258876800537, "logits_per_token_corr": -1.448258876800537, "logits_per_char_corr": -0.7241294384002686, "bits_per_byte_corr": 1.0446979497425282}, "model_output": [{"sum_logits": -1.448258876800537, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.448258876800537, "logits_per_char": -0.7241294384002686, "bits_per_byte": 1.0446979497425282, "num_chars": 2}, {"sum_logits": -1.4387526512145996, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4387526512145996, "logits_per_char": -0.7193763256072998, "bits_per_byte": 1.0378406574873213, "num_chars": 2}, {"sum_logits": -1.3490958213806152, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3490958213806152, "logits_per_char": -0.6745479106903076, "bits_per_byte": 0.9731669255956173, "num_chars": 2}, {"sum_logits": -1.3983149528503418, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3983149528503418, "logits_per_char": -0.6991574764251709, "bits_per_byte": 1.0086710240397612, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 479, "native_id": "MCAS_1998_8_26", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4626328945159912, "logits_per_token_corr": -1.4626328945159912, "logits_per_char_corr": -0.7313164472579956, "bits_per_byte_corr": 1.0550666117804035}, "model_output": [{"sum_logits": -1.1133573055267334, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.1133573055267334, "logits_per_char": -0.5566786527633667, "bits_per_byte": 0.8031175317110354, "num_chars": 2}, {"sum_logits": -1.1585423946380615, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.1585423946380615, "logits_per_char": -0.5792711973190308, "bits_per_byte": 0.8357116837025783, "num_chars": 2}, {"sum_logits": -1.4626328945159912, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4626328945159912, "logits_per_char": -0.7313164472579956, "bits_per_byte": 1.0550666117804035, "num_chars": 2}, {"sum_logits": -2.173043966293335, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -2.173043966293335, "logits_per_char": -1.0865219831466675, "bits_per_byte": 1.5675198769036651, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 480, "native_id": "Mercury_7230318", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.592437982559204, "logits_per_token_corr": -1.592437982559204, "logits_per_char_corr": -0.796218991279602, "bits_per_byte_corr": 1.1487011901815198}, "model_output": [{"sum_logits": -1.3829119205474854, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3829119205474854, "logits_per_char": -0.6914559602737427, "bits_per_byte": 0.9975600848807613, "num_chars": 2}, {"sum_logits": -1.2111341953277588, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.2111341953277588, "logits_per_char": -0.6055670976638794, "bits_per_byte": 0.873648648725829, "num_chars": 2}, {"sum_logits": -1.4508898258209229, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4508898258209229, "logits_per_char": -0.7254449129104614, "bits_per_byte": 1.0465957782948005, "num_chars": 2}, {"sum_logits": -1.592437982559204, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.592437982559204, "logits_per_char": -0.796218991279602, "bits_per_byte": 1.1487011901815198, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 481, "native_id": "Mercury_SC_416167", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3536096811294556, "logits_per_token_corr": -1.3536096811294556, "logits_per_char_corr": -0.6768048405647278, "bits_per_byte_corr": 0.9764229871330798}, "model_output": [{"sum_logits": -1.4987112283706665, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4987112283706665, "logits_per_char": -0.7493556141853333, "bits_per_byte": 1.0810916284482603, "num_chars": 2}, {"sum_logits": -1.2130762338638306, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.2130762338638306, "logits_per_char": -0.6065381169319153, "bits_per_byte": 0.8750495334084331, "num_chars": 2}, {"sum_logits": -1.3536096811294556, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3536096811294556, "logits_per_char": -0.6768048405647278, "bits_per_byte": 0.9764229871330798, "num_chars": 2}, {"sum_logits": -1.577074646949768, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.577074646949768, "logits_per_char": -0.788537323474884, "bits_per_byte": 1.137618886133889, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 482, "native_id": "Mercury_7027720", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2590925693511963, "logits_per_token_corr": -1.2590925693511963, "logits_per_char_corr": -0.6295462846755981, "bits_per_byte_corr": 0.9082433029122096}, "model_output": [{"sum_logits": -1.2381632328033447, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.2381632328033447, "logits_per_char": -0.6190816164016724, "bits_per_byte": 0.893145977888858, "num_chars": 2}, {"sum_logits": -1.2590925693511963, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.2590925693511963, "logits_per_char": -0.6295462846755981, "bits_per_byte": 0.9082433029122096, "num_chars": 2}, {"sum_logits": -1.3950445652008057, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.3950445652008057, "logits_per_char": -0.6975222826004028, "bits_per_byte": 1.0063119380178742, "num_chars": 2}, {"sum_logits": -1.8160955905914307, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.8160955905914307, "logits_per_char": -0.9080477952957153, "bits_per_byte": 1.3100360511642264, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 483, "native_id": "LEAP__5_10312", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1338578462600708, "logits_per_token_corr": -1.1338578462600708, "logits_per_char_corr": -0.5669289231300354, "bits_per_byte_corr": 0.81790554593681}, "model_output": [{"sum_logits": -1.516109585762024, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.516109585762024, "logits_per_char": -0.758054792881012, "bits_per_byte": 1.093641890412332, "num_chars": 2}, {"sum_logits": -1.1338578462600708, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": true, "logits_per_token": -1.1338578462600708, "logits_per_char": -0.5669289231300354, "bits_per_byte": 0.81790554593681, "num_chars": 2}, {"sum_logits": -1.2653313875198364, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.2653313875198364, "logits_per_char": -0.6326656937599182, "bits_per_byte": 0.9127436589286654, "num_chars": 2}, {"sum_logits": -1.821884274482727, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.821884274482727, "logits_per_char": -0.9109421372413635, "bits_per_byte": 1.3142117039358527, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 484, "native_id": "Mercury_405161", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9142239093780518, "logits_per_token_corr": -1.9142239093780518, "logits_per_char_corr": -0.9571119546890259, "bits_per_byte_corr": 1.380820670606392}, "model_output": [{"sum_logits": -1.496649980545044, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.496649980545044, "logits_per_char": -0.748324990272522, "bits_per_byte": 1.0796047524402248, "num_chars": 2}, {"sum_logits": -1.0742180347442627, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.0742180347442627, "logits_per_char": -0.5371090173721313, "bits_per_byte": 0.7748845157800747, "num_chars": 2}, {"sum_logits": -1.3292343616485596, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3292343616485596, "logits_per_char": -0.6646171808242798, "bits_per_byte": 0.9588399108654808, "num_chars": 2}, {"sum_logits": -1.9142239093780518, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.9142239093780518, "logits_per_char": -0.9571119546890259, "bits_per_byte": 1.380820670606392, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 485, "native_id": "Mercury_SC_409245", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8257211446762085, "logits_per_token_corr": -1.8257211446762085, "logits_per_char_corr": -0.9128605723381042, "bits_per_byte_corr": 1.31697942073619}, "model_output": [{"sum_logits": -1.477479338645935, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.477479338645935, "logits_per_char": -0.7387396693229675, "bits_per_byte": 1.0657760574409636, "num_chars": 2}, {"sum_logits": -1.105878233909607, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.105878233909607, "logits_per_char": -0.5529391169548035, "bits_per_byte": 0.7977225219447907, "num_chars": 2}, {"sum_logits": -1.3148225545883179, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3148225545883179, "logits_per_char": -0.6574112772941589, "bits_per_byte": 0.9484439895774437, "num_chars": 2}, {"sum_logits": -1.8257211446762085, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.8257211446762085, "logits_per_char": -0.9128605723381042, "bits_per_byte": 1.31697942073619, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 486, "native_id": "ACTAAP_2011_5_8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4293206930160522, "logits_per_token_corr": -1.4293206930160522, "logits_per_char_corr": -0.7146603465080261, "bits_per_byte_corr": 1.0310369378278583}, "model_output": [{"sum_logits": -1.4307111501693726, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4307111501693726, "logits_per_char": -0.7153555750846863, "bits_per_byte": 1.0320399406476908, "num_chars": 2}, {"sum_logits": -1.2224820852279663, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.2224820852279663, "logits_per_char": -0.6112410426139832, "bits_per_byte": 0.8818344209676265, "num_chars": 2}, {"sum_logits": -1.4293206930160522, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4293206930160522, "logits_per_char": -0.7146603465080261, "bits_per_byte": 1.0310369378278583, "num_chars": 2}, {"sum_logits": -1.5289758443832397, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.5289758443832397, "logits_per_char": -0.7644879221916199, "bits_per_byte": 1.1029229341661502, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 487, "native_id": "Mercury_7223370", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8582361936569214, "logits_per_token_corr": -1.8582361936569214, "logits_per_char_corr": -0.9291180968284607, "bits_per_byte_corr": 1.340434070695575}, "model_output": [{"sum_logits": -1.1452730894088745, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.1452730894088745, "logits_per_char": -0.5726365447044373, "bits_per_byte": 0.8261399032774763, "num_chars": 2}, {"sum_logits": -1.1455248594284058, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.1455248594284058, "logits_per_char": -0.5727624297142029, "bits_per_byte": 0.8263215169567876, "num_chars": 2}, {"sum_logits": -1.6847857236862183, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.6847857236862183, "logits_per_char": -0.8423928618431091, "bits_per_byte": 1.2153160042621884, "num_chars": 2}, {"sum_logits": -1.8582361936569214, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.8582361936569214, "logits_per_char": -0.9291180968284607, "bits_per_byte": 1.340434070695575, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 488, "native_id": "Mercury_SC_400697", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2454200983047485, "logits_per_token_corr": -1.2454200983047485, "logits_per_char_corr": -0.6227100491523743, "bits_per_byte_corr": 0.8983806998244984}, "model_output": [{"sum_logits": -1.4622875452041626, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4622875452041626, "logits_per_char": -0.7311437726020813, "bits_per_byte": 1.0548174949106284, "num_chars": 2}, {"sum_logits": -1.2454200983047485, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.2454200983047485, "logits_per_char": -0.6227100491523743, "bits_per_byte": 0.8983806998244984, "num_chars": 2}, {"sum_logits": -1.168185830116272, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.168185830116272, "logits_per_char": -0.584092915058136, "bits_per_byte": 0.8426679519733568, "num_chars": 2}, {"sum_logits": -1.839358925819397, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.839358925819397, "logits_per_char": -0.9196794629096985, "bits_per_byte": 1.3268170003482005, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 489, "native_id": "Mercury_SC_401262", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7288511991500854, "logits_per_token_corr": -1.7288511991500854, "logits_per_char_corr": -0.8644255995750427, "bits_per_byte_corr": 1.247102525725279}, "model_output": [{"sum_logits": -1.4342974424362183, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4342974424362183, "logits_per_char": -0.7171487212181091, "bits_per_byte": 1.034626903681971, "num_chars": 2}, {"sum_logits": -1.1778258085250854, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.1778258085250854, "logits_per_char": -0.5889129042625427, "bits_per_byte": 0.8496217264956977, "num_chars": 2}, {"sum_logits": -1.3319281339645386, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3319281339645386, "logits_per_char": -0.6659640669822693, "bits_per_byte": 0.9607830568462556, "num_chars": 2}, {"sum_logits": -1.7288511991500854, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.7288511991500854, "logits_per_char": -0.8644255995750427, "bits_per_byte": 1.247102525725279, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 490, "native_id": "Mercury_7136063", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1518139839172363, "logits_per_token_corr": -1.1518139839172363, "logits_per_char_corr": -0.5759069919586182, "bits_per_byte_corr": 0.8308581613125755}, "model_output": [{"sum_logits": -1.679304599761963, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.679304599761963, "logits_per_char": -0.8396522998809814, "bits_per_byte": 1.211362209110175, "num_chars": 2}, {"sum_logits": -1.1518139839172363, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.1518139839172363, "logits_per_char": -0.5759069919586182, "bits_per_byte": 0.8308581613125755, "num_chars": 2}, {"sum_logits": -1.2434792518615723, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.2434792518615723, "logits_per_char": -0.6217396259307861, "bits_per_byte": 0.8969806750551488, "num_chars": 2}, {"sum_logits": -1.6592488288879395, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.6592488288879395, "logits_per_char": -0.8296244144439697, "bits_per_byte": 1.1968950285195852, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 491, "native_id": "Mercury_405876", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.029827833175659, "logits_per_token_corr": -2.029827833175659, "logits_per_char_corr": -1.0149139165878296, "bits_per_byte_corr": 1.4642112743915088}, "model_output": [{"sum_logits": -1.1810595989227295, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.1810595989227295, "logits_per_char": -0.5905297994613647, "bits_per_byte": 0.851954413180677, "num_chars": 2}, {"sum_logits": -1.0956847667694092, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": true, "logits_per_token": -1.0956847667694092, "logits_per_char": -0.5478423833847046, "bits_per_byte": 0.7903694896984714, "num_chars": 2}, {"sum_logits": -1.560673475265503, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.560673475265503, "logits_per_char": -0.7803367376327515, "bits_per_byte": 1.1257879416070515, "num_chars": 2}, {"sum_logits": -2.029827833175659, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -2.029827833175659, "logits_per_char": -1.0149139165878296, "bits_per_byte": 1.4642112743915088, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 492, "native_id": "Mercury_7057890", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1307672262191772, "logits_per_token_corr": -1.1307672262191772, "logits_per_char_corr": -0.5653836131095886, "bits_per_byte_corr": 0.8156761348336738}, "model_output": [{"sum_logits": -1.1307672262191772, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.1307672262191772, "logits_per_char": -0.5653836131095886, "bits_per_byte": 0.8156761348336738, "num_chars": 2}, {"sum_logits": -1.1427949666976929, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.1427949666976929, "logits_per_char": -0.5713974833488464, "bits_per_byte": 0.824352315604407, "num_chars": 2}, {"sum_logits": -1.5289682149887085, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.5289682149887085, "logits_per_char": -0.7644841074943542, "bits_per_byte": 1.1029174307213225, "num_chars": 2}, {"sum_logits": -2.0538554191589355, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -2.0538554191589355, "logits_per_char": -1.0269277095794678, "bits_per_byte": 1.4815435139628241, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 493, "native_id": "LEAP_2002_4_10247", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.055853247642517, "logits_per_token_corr": -1.055853247642517, "logits_per_char_corr": -0.5279266238212585, "bits_per_byte_corr": 0.7616371221407302}, "model_output": [{"sum_logits": -1.3216785192489624, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3216785192489624, "logits_per_char": -0.6608392596244812, "bits_per_byte": 0.9533895226856581, "num_chars": 2}, {"sum_logits": -1.055853247642517, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.055853247642517, "logits_per_char": -0.5279266238212585, "bits_per_byte": 0.7616371221407302, "num_chars": 2}, {"sum_logits": -1.4577792882919312, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4577792882919312, "logits_per_char": -0.7288896441459656, "bits_per_byte": 1.0515654749654613, "num_chars": 2}, {"sum_logits": -2.0208306312561035, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -2.0208306312561035, "logits_per_char": -1.0104153156280518, "bits_per_byte": 1.4577211650958943, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 494, "native_id": "Mercury_SC_405481", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5710010528564453, "logits_per_token_corr": -1.5710010528564453, "logits_per_char_corr": -0.7855005264282227, "bits_per_byte_corr": 1.1332377140944812}, "model_output": [{"sum_logits": -1.1357421875, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.1357421875, "logits_per_char": -0.56787109375, "bits_per_byte": 0.8192648108179053, "num_chars": 2}, {"sum_logits": -1.0468616485595703, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.0468616485595703, "logits_per_char": -0.5234308242797852, "bits_per_byte": 0.755151054437411, "num_chars": 2}, {"sum_logits": -1.5710010528564453, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5710010528564453, "logits_per_char": -0.7855005264282227, "bits_per_byte": 1.1332377140944812, "num_chars": 2}, {"sum_logits": -2.1905345916748047, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -2.1905345916748047, "logits_per_char": -1.0952672958374023, "bits_per_byte": 1.5801366961536207, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 495, "native_id": "Mercury_SC_400401", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9807412624359131, "logits_per_token_corr": -0.9807412624359131, "logits_per_char_corr": -0.49037063121795654, "bits_per_byte_corr": 0.7074552778562448}, "model_output": [{"sum_logits": -1.5142781734466553, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.5142781734466553, "logits_per_char": -0.7571390867233276, "bits_per_byte": 1.0923208056797284, "num_chars": 2}, {"sum_logits": -0.9807412624359131, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -0.9807412624359131, "logits_per_char": -0.49037063121795654, "bits_per_byte": 0.7074552778562448, "num_chars": 2}, {"sum_logits": -1.445218801498413, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.445218801498413, "logits_per_char": -0.7226094007492065, "bits_per_byte": 1.0425049989613748, "num_chars": 2}, {"sum_logits": -1.851560354232788, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.851560354232788, "logits_per_char": -0.925780177116394, "bits_per_byte": 1.3356184704800875, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 496, "native_id": "Mercury_7064260", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0580435991287231, "logits_per_token_corr": -1.0580435991287231, "logits_per_char_corr": -0.5290217995643616, "bits_per_byte_corr": 0.7632171267542079}, "model_output": [{"sum_logits": -1.3531485795974731, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3531485795974731, "logits_per_char": -0.6765742897987366, "bits_per_byte": 0.9760903726863109, "num_chars": 2}, {"sum_logits": -1.0580435991287231, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.0580435991287231, "logits_per_char": -0.5290217995643616, "bits_per_byte": 0.7632171267542079, "num_chars": 2}, {"sum_logits": -1.304556965827942, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.304556965827942, "logits_per_char": -0.652278482913971, "bits_per_byte": 0.9410389325792384, "num_chars": 2}, {"sum_logits": -2.206740379333496, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -2.206740379333496, "logits_per_char": -1.103370189666748, "bits_per_byte": 1.591826700898076, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 497, "native_id": "Mercury_7015995", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.262053370475769, "logits_per_token_corr": -1.262053370475769, "logits_per_char_corr": -0.6310266852378845, "bits_per_byte_corr": 0.9103790694619509}, "model_output": [{"sum_logits": -1.3594540357589722, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3594540357589722, "logits_per_char": -0.6797270178794861, "bits_per_byte": 0.9806387978536829, "num_chars": 2}, {"sum_logits": -1.262053370475769, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.262053370475769, "logits_per_char": -0.6310266852378845, "bits_per_byte": 0.9103790694619509, "num_chars": 2}, {"sum_logits": -1.4751595258712769, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4751595258712769, "logits_per_char": -0.7375797629356384, "bits_per_byte": 1.0641026662480673, "num_chars": 2}, {"sum_logits": -1.5709885358810425, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.5709885358810425, "logits_per_char": -0.7854942679405212, "bits_per_byte": 1.1332286850053108, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 498, "native_id": "Mercury_400887", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.43687105178833, "logits_per_token_corr": -1.43687105178833, "logits_per_char_corr": -0.718435525894165, "bits_per_byte_corr": 1.036483370406711}, "model_output": [{"sum_logits": -1.5257763862609863, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.5257763862609863, "logits_per_char": -0.7628881931304932, "bits_per_byte": 1.1006150129828949, "num_chars": 2}, {"sum_logits": -1.43687105178833, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.43687105178833, "logits_per_char": -0.718435525894165, "bits_per_byte": 1.036483370406711, "num_chars": 2}, {"sum_logits": -1.2228169441223145, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": true, "logits_per_token": -1.2228169441223145, "logits_per_char": -0.6114084720611572, "bits_per_byte": 0.8820759706007636, "num_chars": 2}, {"sum_logits": -1.5407509803771973, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.5407509803771973, "logits_per_char": -0.7703754901885986, "bits_per_byte": 1.111416899318294, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 499, "native_id": "Mercury_7247678", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6473720073699951, "logits_per_token_corr": -1.6473720073699951, "logits_per_char_corr": -0.8236860036849976, "bits_per_byte_corr": 1.1883277127668481}, "model_output": [{"sum_logits": -1.6473720073699951, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.6473720073699951, "logits_per_char": -0.8236860036849976, "bits_per_byte": 1.1883277127668481, "num_chars": 2}, {"sum_logits": -0.8995368480682373, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -0.8995368480682373, "logits_per_char": -0.44976842403411865, "bits_per_byte": 0.6488786749029336, "num_chars": 2}, {"sum_logits": -1.426375150680542, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.426375150680542, "logits_per_char": -0.713187575340271, "bits_per_byte": 1.0289121781677721, "num_chars": 2}, {"sum_logits": -1.8839442729949951, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.8839442729949951, "logits_per_char": -0.9419721364974976, "bits_per_byte": 1.3589785299814978, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 500, "native_id": "MDSA_2007_8_24", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.39865243434906, "logits_per_token_corr": -1.39865243434906, "logits_per_char_corr": -0.69932621717453, "bits_per_byte_corr": 1.0089144654820577}, "model_output": [{"sum_logits": -1.2683004140853882, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.2683004140853882, "logits_per_char": -0.6341502070426941, "bits_per_byte": 0.9148853588798614, "num_chars": 2}, {"sum_logits": -1.1393331289291382, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -1.1393331289291382, "logits_per_char": -0.5696665644645691, "bits_per_byte": 0.8218551275138772, "num_chars": 2}, {"sum_logits": -1.39865243434906, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.39865243434906, "logits_per_char": -0.69932621717453, "bits_per_byte": 1.0089144654820577, "num_chars": 2}, {"sum_logits": -2.0043482780456543, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -2.0043482780456543, "logits_per_char": -1.0021741390228271, "bits_per_byte": 1.445831660476438, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 501, "native_id": "AKDE&ED_2008_8_48", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3523850440979004, "logits_per_token_corr": -1.3523850440979004, "logits_per_char_corr": -0.6761925220489502, "bits_per_byte_corr": 0.9755395982469224}, "model_output": [{"sum_logits": -1.3523850440979004, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3523850440979004, "logits_per_char": -0.6761925220489502, "bits_per_byte": 0.9755395982469224, "num_chars": 2}, {"sum_logits": -1.153618335723877, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.153618335723877, "logits_per_char": -0.5768091678619385, "bits_per_byte": 0.8321597260143061, "num_chars": 2}, {"sum_logits": -1.37571382522583, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.37571382522583, "logits_per_char": -0.687856912612915, "bits_per_byte": 0.9923677566685586, "num_chars": 2}, {"sum_logits": -1.8190274238586426, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.8190274238586426, "logits_per_char": -0.9095137119293213, "bits_per_byte": 1.3121509218218879, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 502, "native_id": "Mercury_401014", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3598551750183105, "logits_per_token_corr": -1.3598551750183105, "logits_per_char_corr": -0.6799275875091553, "bits_per_byte_corr": 0.9809281586637597}, "model_output": [{"sum_logits": -1.4900908470153809, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.4900908470153809, "logits_per_char": -0.7450454235076904, "bits_per_byte": 1.0748733377323347, "num_chars": 2}, {"sum_logits": -1.3173308372497559, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.3173308372497559, "logits_per_char": -0.6586654186248779, "bits_per_byte": 0.9502533330558471, "num_chars": 2}, {"sum_logits": -1.3598551750183105, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.3598551750183105, "logits_per_char": -0.6799275875091553, "bits_per_byte": 0.9809281586637597, "num_chars": 2}, {"sum_logits": -1.5254817008972168, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.5254817008972168, "logits_per_char": -0.7627408504486084, "bits_per_byte": 1.1004024424264285, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 503, "native_id": "Mercury_7106698", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.487642765045166, "logits_per_token_corr": -1.487642765045166, "logits_per_char_corr": -0.743821382522583, "bits_per_byte_corr": 1.0731074198732742}, "model_output": [{"sum_logits": -1.487642765045166, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.487642765045166, "logits_per_char": -0.743821382522583, "bits_per_byte": 1.0731074198732742, "num_chars": 2}, {"sum_logits": -1.1723294258117676, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.1723294258117676, "logits_per_char": -0.5861647129058838, "bits_per_byte": 0.8456569244540291, "num_chars": 2}, {"sum_logits": -1.3427987098693848, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3427987098693848, "logits_per_char": -0.6713993549346924, "bits_per_byte": 0.9686245198210256, "num_chars": 2}, {"sum_logits": -1.6607403755187988, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.6607403755187988, "logits_per_char": -0.8303701877593994, "bits_per_byte": 1.1979709519833837, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 504, "native_id": "Mercury_7143308", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1981180906295776, "logits_per_token_corr": -1.1981180906295776, "logits_per_char_corr": -0.5990590453147888, "bits_per_byte_corr": 0.8642595138759436}, "model_output": [{"sum_logits": -1.4879206418991089, "num_tokens": 1, "num_tokens_all": 422, "is_greedy": false, "logits_per_token": -1.4879206418991089, "logits_per_char": -0.7439603209495544, "bits_per_byte": 1.073307865652855, "num_chars": 2}, {"sum_logits": -1.1981180906295776, "num_tokens": 1, "num_tokens_all": 422, "is_greedy": true, "logits_per_token": -1.1981180906295776, "logits_per_char": -0.5990590453147888, "bits_per_byte": 0.8642595138759436, "num_chars": 2}, {"sum_logits": -1.2554396390914917, "num_tokens": 1, "num_tokens_all": 422, "is_greedy": false, "logits_per_token": -1.2554396390914917, "logits_per_char": -0.6277198195457458, "bits_per_byte": 0.9056082707270131, "num_chars": 2}, {"sum_logits": -1.801742434501648, "num_tokens": 1, "num_tokens_all": 422, "is_greedy": false, "logits_per_token": -1.801742434501648, "logits_per_char": -0.900871217250824, "bits_per_byte": 1.2996824376083014, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 505, "native_id": "MCAS_2005_9_21", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2622569799423218, "logits_per_token_corr": -1.2622569799423218, "logits_per_char_corr": -0.6311284899711609, "bits_per_byte_corr": 0.9105259426457878}, "model_output": [{"sum_logits": -1.8943179845809937, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.8943179845809937, "logits_per_char": -0.9471589922904968, "bits_per_byte": 1.3664615811118694, "num_chars": 2}, {"sum_logits": -1.2622569799423218, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.2622569799423218, "logits_per_char": -0.6311284899711609, "bits_per_byte": 0.9105259426457878, "num_chars": 2}, {"sum_logits": -1.545324683189392, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.545324683189392, "logits_per_char": -0.772662341594696, "bits_per_byte": 1.1147161285011231, "num_chars": 2}, {"sum_logits": -1.1737903356552124, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.1737903356552124, "logits_per_char": -0.5868951678276062, "bits_per_byte": 0.8467107481471917, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 506, "native_id": "Mercury_400443", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3742804527282715, "logits_per_token_corr": -1.3742804527282715, "logits_per_char_corr": -0.6871402263641357, "bits_per_byte_corr": 0.9913337969715706}, "model_output": [{"sum_logits": -1.6949477195739746, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.6949477195739746, "logits_per_char": -0.8474738597869873, "bits_per_byte": 1.2226463347985936, "num_chars": 2}, {"sum_logits": -1.3742804527282715, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3742804527282715, "logits_per_char": -0.6871402263641357, "bits_per_byte": 0.9913337969715706, "num_chars": 2}, {"sum_logits": -1.247546672821045, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.247546672821045, "logits_per_char": -0.6237733364105225, "bits_per_byte": 0.8999146990788703, "num_chars": 2}, {"sum_logits": -1.421027660369873, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.421027660369873, "logits_per_char": -0.7105138301849365, "bits_per_byte": 1.0250547792915674, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 507, "native_id": "Mercury_7283430", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0797382593154907, "logits_per_token_corr": -1.0797382593154907, "logits_per_char_corr": -0.5398691296577454, "bits_per_byte_corr": 0.7788665160868296}, "model_output": [{"sum_logits": -1.0413166284561157, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.0413166284561157, "logits_per_char": -0.5206583142280579, "bits_per_byte": 0.7511511679349664, "num_chars": 2}, {"sum_logits": -1.0797382593154907, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.0797382593154907, "logits_per_char": -0.5398691296577454, "bits_per_byte": 0.7788665160868296, "num_chars": 2}, {"sum_logits": -1.5433918237686157, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.5433918237686157, "logits_per_char": -0.7716959118843079, "bits_per_byte": 1.1133218651505772, "num_chars": 2}, {"sum_logits": -2.534409999847412, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -2.534409999847412, "logits_per_char": -1.267204999923706, "bits_per_byte": 1.8281903691809434, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 508, "native_id": "Mercury_7159250", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.410266399383545, "logits_per_token_corr": -1.410266399383545, "logits_per_char_corr": -0.7051331996917725, "bits_per_byte_corr": 1.0172921703622182}, "model_output": [{"sum_logits": -1.838996410369873, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.838996410369873, "logits_per_char": -0.9194982051849365, "bits_per_byte": 1.3265555007275636, "num_chars": 2}, {"sum_logits": -1.0081725120544434, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.0081725120544434, "logits_per_char": -0.5040862560272217, "bits_per_byte": 0.7272427417512796, "num_chars": 2}, {"sum_logits": -1.410266399383545, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.410266399383545, "logits_per_char": -0.7051331996917725, "bits_per_byte": 1.0172921703622182, "num_chars": 2}, {"sum_logits": -1.54933500289917, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.54933500289917, "logits_per_char": -0.774667501449585, "bits_per_byte": 1.117608962679963, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 509, "native_id": "Mercury_401912", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4939619302749634, "logits_per_token_corr": -1.4939619302749634, "logits_per_char_corr": -0.7469809651374817, "bits_per_byte_corr": 1.0776657340430706}, "model_output": [{"sum_logits": -1.4939619302749634, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4939619302749634, "logits_per_char": -0.7469809651374817, "bits_per_byte": 1.0776657340430706, "num_chars": 2}, {"sum_logits": -1.123116135597229, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.123116135597229, "logits_per_char": -0.5615580677986145, "bits_per_byte": 0.8101570395848314, "num_chars": 2}, {"sum_logits": -1.6644903421401978, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.6644903421401978, "logits_per_char": -0.8322451710700989, "bits_per_byte": 1.2006759811074812, "num_chars": 2}, {"sum_logits": -1.8948427438735962, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.8948427438735962, "logits_per_char": -0.9474213719367981, "bits_per_byte": 1.3668401149264187, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 510, "native_id": "Mercury_7219328", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.306302785873413, "logits_per_token_corr": -1.306302785873413, "logits_per_char_corr": -0.6531513929367065, "bits_per_byte_corr": 0.9422982755401822}, "model_output": [{"sum_logits": -1.7124364376068115, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.7124364376068115, "logits_per_char": -0.8562182188034058, "bits_per_byte": 1.2352617781873423, "num_chars": 2}, {"sum_logits": -1.306302785873413, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.306302785873413, "logits_per_char": -0.6531513929367065, "bits_per_byte": 0.9422982755401822, "num_chars": 2}, {"sum_logits": -1.4029366970062256, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4029366970062256, "logits_per_char": -0.7014683485031128, "bits_per_byte": 1.012004907726739, "num_chars": 2}, {"sum_logits": -1.2453839778900146, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.2453839778900146, "logits_per_char": -0.6226919889450073, "bits_per_byte": 0.8983546444528927, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 511, "native_id": "Mercury_7214498", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3480918407440186, "logits_per_token_corr": -1.3480918407440186, "logits_per_char_corr": -0.6740459203720093, "bits_per_byte_corr": 0.9724427066528335}, "model_output": [{"sum_logits": -1.5011718273162842, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5011718273162842, "logits_per_char": -0.7505859136581421, "bits_per_byte": 1.0828665753964912, "num_chars": 2}, {"sum_logits": -1.2909553050994873, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2909553050994873, "logits_per_char": -0.6454776525497437, "bits_per_byte": 0.9312274083388336, "num_chars": 2}, {"sum_logits": -1.3480918407440186, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3480918407440186, "logits_per_char": -0.6740459203720093, "bits_per_byte": 0.9724427066528335, "num_chars": 2}, {"sum_logits": -1.5835483074188232, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5835483074188232, "logits_per_char": -0.7917741537094116, "bits_per_byte": 1.1422886450614447, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 512, "native_id": "TAKS_2009_5_14", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2035149335861206, "logits_per_token_corr": -1.2035149335861206, "logits_per_char_corr": -0.6017574667930603, "bits_per_byte_corr": 0.8681525131608769}, "model_output": [{"sum_logits": -1.2035149335861206, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.2035149335861206, "logits_per_char": -0.6017574667930603, "bits_per_byte": 0.8681525131608769, "num_chars": 2}, {"sum_logits": -1.307749629020691, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.307749629020691, "logits_per_char": -0.6538748145103455, "bits_per_byte": 0.943341952256944, "num_chars": 2}, {"sum_logits": -1.470347285270691, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.470347285270691, "logits_per_char": -0.7351736426353455, "bits_per_byte": 1.06063136842305, "num_chars": 2}, {"sum_logits": -1.7227734327316284, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.7227734327316284, "logits_per_char": -0.8613867163658142, "bits_per_byte": 1.2427183439894811, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 513, "native_id": "NYSEDREGENTS_2013_4_17", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7452986240386963, "logits_per_token_corr": -1.7452986240386963, "logits_per_char_corr": -0.8726493120193481, "bits_per_byte_corr": 1.2589668348863838}, "model_output": [{"sum_logits": -1.1101229190826416, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.1101229190826416, "logits_per_char": -0.5550614595413208, "bits_per_byte": 0.8007844150694289, "num_chars": 2}, {"sum_logits": -1.4217913150787354, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4217913150787354, "logits_per_char": -0.7108956575393677, "bits_per_byte": 1.0256056397222815, "num_chars": 2}, {"sum_logits": -1.4543344974517822, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4543344974517822, "logits_per_char": -0.7271672487258911, "bits_per_byte": 1.049080583634468, "num_chars": 2}, {"sum_logits": -1.7452986240386963, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.7452986240386963, "logits_per_char": -0.8726493120193481, "bits_per_byte": 1.2589668348863838, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 514, "native_id": "Mercury_403907", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4132709503173828, "logits_per_token_corr": -1.4132709503173828, "logits_per_char_corr": -0.7066354751586914, "bits_per_byte_corr": 1.019459495728393}, "model_output": [{"sum_logits": -1.3519001007080078, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3519001007080078, "logits_per_char": -0.6759500503540039, "bits_per_byte": 0.9751897855350672, "num_chars": 2}, {"sum_logits": -1.080545425415039, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.080545425415039, "logits_per_char": -0.5402727127075195, "bits_per_byte": 0.7794487633513261, "num_chars": 2}, {"sum_logits": -1.4132709503173828, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4132709503173828, "logits_per_char": -0.7066354751586914, "bits_per_byte": 1.019459495728393, "num_chars": 2}, {"sum_logits": -1.9022407531738281, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.9022407531738281, "logits_per_char": -0.9511203765869141, "bits_per_byte": 1.3721766505913702, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 515, "native_id": "Mercury_7081480", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2553962469100952, "logits_per_token_corr": -1.2553962469100952, "logits_per_char_corr": -0.6276981234550476, "bits_per_byte_corr": 0.9055769698845562}, "model_output": [{"sum_logits": -1.2553962469100952, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.2553962469100952, "logits_per_char": -0.6276981234550476, "bits_per_byte": 0.9055769698845562, "num_chars": 2}, {"sum_logits": -1.197271704673767, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.197271704673767, "logits_per_char": -0.5986358523368835, "bits_per_byte": 0.8636489754653801, "num_chars": 2}, {"sum_logits": -1.434486746788025, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.434486746788025, "logits_per_char": -0.7172433733940125, "bits_per_byte": 1.0347634579067562, "num_chars": 2}, {"sum_logits": -1.8180679082870483, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.8180679082870483, "logits_per_char": -0.9090339541435242, "bits_per_byte": 1.3114587776434898, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 516, "native_id": "Mercury_416505", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2066938877105713, "logits_per_token_corr": -1.2066938877105713, "logits_per_char_corr": -0.6033469438552856, "bits_per_byte_corr": 0.8704456438361577}, "model_output": [{"sum_logits": -1.4154951572418213, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4154951572418213, "logits_per_char": -0.7077475786209106, "bits_per_byte": 1.021063921878293, "num_chars": 2}, {"sum_logits": -1.2066938877105713, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.2066938877105713, "logits_per_char": -0.6033469438552856, "bits_per_byte": 0.8704456438361577, "num_chars": 2}, {"sum_logits": -1.4179918766021729, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4179918766021729, "logits_per_char": -0.7089959383010864, "bits_per_byte": 1.0228649241981298, "num_chars": 2}, {"sum_logits": -1.6643717288970947, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.6643717288970947, "logits_per_char": -0.8321858644485474, "bits_per_byte": 1.200590419738677, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 517, "native_id": "Mercury_7041668", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7023242712020874, "logits_per_token_corr": -1.7023242712020874, "logits_per_char_corr": -0.8511621356010437, "bits_per_byte_corr": 1.2279673920249674}, "model_output": [{"sum_logits": -1.5091537237167358, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.5091537237167358, "logits_per_char": -0.7545768618583679, "bits_per_byte": 1.0886242965734059, "num_chars": 2}, {"sum_logits": -1.207037329673767, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.207037329673767, "logits_per_char": -0.6035186648368835, "bits_per_byte": 0.8706933848447258, "num_chars": 2}, {"sum_logits": -1.2592166662216187, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.2592166662216187, "logits_per_char": -0.6296083331108093, "bits_per_byte": 0.9083328198819838, "num_chars": 2}, {"sum_logits": -1.7023242712020874, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.7023242712020874, "logits_per_char": -0.8511621356010437, "bits_per_byte": 1.2279673920249674, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 518, "native_id": "Mercury_SC_401309", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4999810457229614, "logits_per_token_corr": -1.4999810457229614, "logits_per_char_corr": -0.7499905228614807, "bits_per_byte_corr": 1.0820076080467562}, "model_output": [{"sum_logits": -1.3435059785842896, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3435059785842896, "logits_per_char": -0.6717529892921448, "bits_per_byte": 0.9691347063548105, "num_chars": 2}, {"sum_logits": -1.2801648378372192, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.2801648378372192, "logits_per_char": -0.6400824189186096, "bits_per_byte": 0.9234437315347536, "num_chars": 2}, {"sum_logits": -1.4999810457229614, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4999810457229614, "logits_per_char": -0.7499905228614807, "bits_per_byte": 1.0820076080467562, "num_chars": 2}, {"sum_logits": -1.5219022035598755, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.5219022035598755, "logits_per_char": -0.7609511017799377, "bits_per_byte": 1.0978203808976978, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 519, "native_id": "NYSEDREGENTS_2010_4_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2745704650878906, "logits_per_token_corr": -1.2745704650878906, "logits_per_char_corr": -0.6372852325439453, "bits_per_byte_corr": 0.9194082446235803}, "model_output": [{"sum_logits": -1.3529853820800781, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3529853820800781, "logits_per_char": -0.6764926910400391, "bits_per_byte": 0.9759726505617952, "num_chars": 2}, {"sum_logits": -1.2745704650878906, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.2745704650878906, "logits_per_char": -0.6372852325439453, "bits_per_byte": 0.9194082446235803, "num_chars": 2}, {"sum_logits": -1.3360137939453125, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3360137939453125, "logits_per_char": -0.6680068969726562, "bits_per_byte": 0.9637302375427682, "num_chars": 2}, {"sum_logits": -1.6915740966796875, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.6915740966796875, "logits_per_char": -0.8457870483398438, "bits_per_byte": 1.2202127802888831, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 520, "native_id": "ACTAAP_2007_7_36", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3237643241882324, "logits_per_token_corr": -1.3237643241882324, "logits_per_char_corr": -0.6618821620941162, "bits_per_byte_corr": 0.9548941129067325}, "model_output": [{"sum_logits": -1.4645495414733887, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4645495414733887, "logits_per_char": -0.7322747707366943, "bits_per_byte": 1.0564491803106906, "num_chars": 2}, {"sum_logits": -1.2642035484313965, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.2642035484313965, "logits_per_char": -0.6321017742156982, "bits_per_byte": 0.9119300949987582, "num_chars": 2}, {"sum_logits": -1.3237643241882324, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3237643241882324, "logits_per_char": -0.6618821620941162, "bits_per_byte": 0.9548941129067325, "num_chars": 2}, {"sum_logits": -1.6443305015563965, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.6443305015563965, "logits_per_char": -0.8221652507781982, "bits_per_byte": 1.1861337300897896, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 521, "native_id": "VASoL_2009_3_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.837404727935791, "logits_per_token_corr": -1.837404727935791, "logits_per_char_corr": -0.9187023639678955, "bits_per_byte_corr": 1.3254073445504027}, "model_output": [{"sum_logits": -1.4048504829406738, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4048504829406738, "logits_per_char": -0.7024252414703369, "bits_per_byte": 1.0133854124652157, "num_chars": 2}, {"sum_logits": -1.149782657623291, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.149782657623291, "logits_per_char": -0.5748913288116455, "bits_per_byte": 0.8293928691272233, "num_chars": 2}, {"sum_logits": -1.3280205726623535, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3280205726623535, "logits_per_char": -0.6640102863311768, "bits_per_byte": 0.9579643471899376, "num_chars": 2}, {"sum_logits": -1.837404727935791, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.837404727935791, "logits_per_char": -0.9187023639678955, "bits_per_byte": 1.3254073445504027, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 522, "native_id": "Mercury_7085295", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3673433065414429, "logits_per_token_corr": -1.3673433065414429, "logits_per_char_corr": -0.6836716532707214, "bits_per_byte_corr": 0.9863297037707373}, "model_output": [{"sum_logits": -1.2687925100326538, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.2687925100326538, "logits_per_char": -0.6343962550163269, "bits_per_byte": 0.9152403310712426, "num_chars": 2}, {"sum_logits": -1.1407636404037476, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.1407636404037476, "logits_per_char": -0.5703818202018738, "bits_per_byte": 0.8228870234190548, "num_chars": 2}, {"sum_logits": -1.3673433065414429, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3673433065414429, "logits_per_char": -0.6836716532707214, "bits_per_byte": 0.9863297037707373, "num_chars": 2}, {"sum_logits": -2.0566020011901855, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -2.0566020011901855, "logits_per_char": -1.0283010005950928, "bits_per_byte": 1.4835247541007652, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 523, "native_id": "Mercury_7201968", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6919292211532593, "logits_per_token_corr": -1.6919292211532593, "logits_per_char_corr": -0.8459646105766296, "bits_per_byte_corr": 1.2204689484473434}, "model_output": [{"sum_logits": -1.6919292211532593, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.6919292211532593, "logits_per_char": -0.8459646105766296, "bits_per_byte": 1.2204689484473434, "num_chars": 2}, {"sum_logits": -1.002447485923767, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": true, "logits_per_token": -1.002447485923767, "logits_per_char": -0.5012237429618835, "bits_per_byte": 0.7231130083474334, "num_chars": 2}, {"sum_logits": -1.4370540380477905, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.4370540380477905, "logits_per_char": -0.7185270190238953, "bits_per_byte": 1.0366153670912484, "num_chars": 2}, {"sum_logits": -1.6044105291366577, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.6044105291366577, "logits_per_char": -0.8022052645683289, "bits_per_byte": 1.1573375569685784, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 524, "native_id": "Mercury_7214008", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3842766284942627, "logits_per_token_corr": -1.3842766284942627, "logits_per_char_corr": -0.6921383142471313, "bits_per_byte_corr": 0.9985445135743007}, "model_output": [{"sum_logits": -1.3191215991973877, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3191215991973877, "logits_per_char": -0.6595607995986938, "bits_per_byte": 0.9515450947464786, "num_chars": 2}, {"sum_logits": -1.143195390701294, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.143195390701294, "logits_per_char": -0.571597695350647, "bits_per_byte": 0.8246411604665314, "num_chars": 2}, {"sum_logits": -1.3842766284942627, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3842766284942627, "logits_per_char": -0.6921383142471313, "bits_per_byte": 0.9985445135743007, "num_chars": 2}, {"sum_logits": -1.8766262531280518, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.8766262531280518, "logits_per_char": -0.9383131265640259, "bits_per_byte": 1.353699694495911, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 525, "native_id": "Mercury_176855", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4219956398010254, "logits_per_token_corr": -1.4219956398010254, "logits_per_char_corr": -0.7109978199005127, "bits_per_byte_corr": 1.025753028854071}, "model_output": [{"sum_logits": -1.5135579109191895, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.5135579109191895, "logits_per_char": -0.7567789554595947, "bits_per_byte": 1.0918012460914714, "num_chars": 2}, {"sum_logits": -1.4219956398010254, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4219956398010254, "logits_per_char": -0.7109978199005127, "bits_per_byte": 1.025753028854071, "num_chars": 2}, {"sum_logits": -1.103081226348877, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.103081226348877, "logits_per_char": -0.5515406131744385, "bits_per_byte": 0.7957049074761922, "num_chars": 2}, {"sum_logits": -1.6535954475402832, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.6535954475402832, "logits_per_char": -0.8267977237701416, "bits_per_byte": 1.1928169759023233, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 526, "native_id": "Mercury_SC_401678", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1646162271499634, "logits_per_token_corr": -1.1646162271499634, "logits_per_char_corr": -0.5823081135749817, "bits_per_byte_corr": 0.8400930277246369}, "model_output": [{"sum_logits": -1.358650803565979, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.358650803565979, "logits_per_char": -0.6793254017829895, "bits_per_byte": 0.9800593883029256, "num_chars": 2}, {"sum_logits": -1.1646162271499634, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.1646162271499634, "logits_per_char": -0.5823081135749817, "bits_per_byte": 0.8400930277246369, "num_chars": 2}, {"sum_logits": -1.3107115030288696, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3107115030288696, "logits_per_char": -0.6553557515144348, "bits_per_byte": 0.9454784927286142, "num_chars": 2}, {"sum_logits": -1.9552370309829712, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.9552370309829712, "logits_per_char": -0.9776185154914856, "bits_per_byte": 1.4104053841818098, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 527, "native_id": "Mercury_417143", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6568189859390259, "logits_per_token_corr": -1.6568189859390259, "logits_per_char_corr": -0.8284094929695129, "bits_per_byte_corr": 1.1951422673333156}, "model_output": [{"sum_logits": -1.7875810861587524, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.7875810861587524, "logits_per_char": -0.8937905430793762, "bits_per_byte": 1.2894671840949958, "num_chars": 2}, {"sum_logits": -1.0050607919692993, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.0050607919692993, "logits_per_char": -0.5025303959846497, "bits_per_byte": 0.724998110183542, "num_chars": 2}, {"sum_logits": -1.374376893043518, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.374376893043518, "logits_per_char": -0.687188446521759, "bits_per_byte": 0.9914033639538447, "num_chars": 2}, {"sum_logits": -1.6568189859390259, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.6568189859390259, "logits_per_char": -0.8284094929695129, "bits_per_byte": 1.1951422673333156, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 528, "native_id": "NYSEDREGENTS_2013_4_21", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3452907800674438, "logits_per_token_corr": -1.3452907800674438, "logits_per_char_corr": -0.6726453900337219, "bits_per_byte_corr": 0.9704221684791704}, "model_output": [{"sum_logits": -1.5756317377090454, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.5756317377090454, "logits_per_char": -0.7878158688545227, "bits_per_byte": 1.1365780471308664, "num_chars": 2}, {"sum_logits": -1.5060440301895142, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.5060440301895142, "logits_per_char": -0.7530220150947571, "bits_per_byte": 1.0863811268582007, "num_chars": 2}, {"sum_logits": -1.2213855981826782, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2213855981826782, "logits_per_char": -0.6106927990913391, "bits_per_byte": 0.881043472756308, "num_chars": 2}, {"sum_logits": -1.3452907800674438, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3452907800674438, "logits_per_char": -0.6726453900337219, "bits_per_byte": 0.9704221684791704, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 529, "native_id": "Mercury_7032620", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2748770713806152, "logits_per_token_corr": -1.2748770713806152, "logits_per_char_corr": -0.6374385356903076, "bits_per_byte_corr": 0.91962941431259}, "model_output": [{"sum_logits": -1.200223445892334, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.200223445892334, "logits_per_char": -0.600111722946167, "bits_per_byte": 0.8657782066743887, "num_chars": 2}, {"sum_logits": -1.2748770713806152, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.2748770713806152, "logits_per_char": -0.6374385356903076, "bits_per_byte": 0.91962941431259, "num_chars": 2}, {"sum_logits": -1.3183379173278809, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3183379173278809, "logits_per_char": -0.6591689586639404, "bits_per_byte": 0.9509797877730922, "num_chars": 2}, {"sum_logits": -2.048398494720459, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -2.048398494720459, "logits_per_char": -1.0241992473602295, "bits_per_byte": 1.4776071750498734, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 530, "native_id": "NYSEDREGENTS_2008_8_9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6770212650299072, "logits_per_token_corr": -1.6770212650299072, "logits_per_char_corr": -0.8385106325149536, "bits_per_byte_corr": 1.2097151312628607}, "model_output": [{"sum_logits": -1.2484877109527588, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.2484877109527588, "logits_per_char": -0.6242438554763794, "bits_per_byte": 0.9005935146018264, "num_chars": 2}, {"sum_logits": -1.3201258182525635, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3201258182525635, "logits_per_char": -0.6600629091262817, "bits_per_byte": 0.9522694856719133, "num_chars": 2}, {"sum_logits": -1.3988401889801025, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3988401889801025, "logits_per_char": -0.6994200944900513, "bits_per_byte": 1.0090499018196122, "num_chars": 2}, {"sum_logits": -1.6770212650299072, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.6770212650299072, "logits_per_char": -0.8385106325149536, "bits_per_byte": 1.2097151312628607, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 531, "native_id": "TAKS_2009_8_27", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3493751287460327, "logits_per_token_corr": -1.3493751287460327, "logits_per_char_corr": -0.6746875643730164, "bits_per_byte_corr": 0.9733684032711033}, "model_output": [{"sum_logits": -1.307228446006775, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.307228446006775, "logits_per_char": -0.6536142230033875, "bits_per_byte": 0.9429659981821575, "num_chars": 2}, {"sum_logits": -1.0398849248886108, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.0398849248886108, "logits_per_char": -0.5199424624443054, "bits_per_byte": 0.7501184121165344, "num_chars": 2}, {"sum_logits": -1.3493751287460327, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.3493751287460327, "logits_per_char": -0.6746875643730164, "bits_per_byte": 0.9733684032711033, "num_chars": 2}, {"sum_logits": -2.2385506629943848, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -2.2385506629943848, "logits_per_char": -1.1192753314971924, "bits_per_byte": 1.6147729701415101, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 532, "native_id": "NCEOGA_2013_8_57", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4028356075286865, "logits_per_token_corr": -1.4028356075286865, "logits_per_char_corr": -0.7014178037643433, "bits_per_byte_corr": 1.0119319870827732}, "model_output": [{"sum_logits": -1.4277875423431396, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4277875423431396, "logits_per_char": -0.7138937711715698, "bits_per_byte": 1.0299310033914841, "num_chars": 2}, {"sum_logits": -0.9908597469329834, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -0.9908597469329834, "logits_per_char": -0.4954298734664917, "bits_per_byte": 0.7147542215588677, "num_chars": 2}, {"sum_logits": -1.4028356075286865, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4028356075286865, "logits_per_char": -0.7014178037643433, "bits_per_byte": 1.0119319870827732, "num_chars": 2}, {"sum_logits": -2.0198419094085693, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -2.0198419094085693, "logits_per_char": -1.0099209547042847, "bits_per_byte": 1.4570079530427658, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 533, "native_id": "Mercury_SC_413143", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.368543028831482, "logits_per_token_corr": -1.368543028831482, "logits_per_char_corr": -0.684271514415741, "bits_per_byte_corr": 0.9871951204698796}, "model_output": [{"sum_logits": -1.2115854024887085, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.2115854024887085, "logits_per_char": -0.6057927012443542, "bits_per_byte": 0.8739741258925872, "num_chars": 2}, {"sum_logits": -1.2764314413070679, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.2764314413070679, "logits_per_char": -0.6382157206535339, "bits_per_byte": 0.9207506552048909, "num_chars": 2}, {"sum_logits": -1.368543028831482, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.368543028831482, "logits_per_char": -0.684271514415741, "bits_per_byte": 0.9871951204698796, "num_chars": 2}, {"sum_logits": -1.9275392293930054, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.9275392293930054, "logits_per_char": -0.9637696146965027, "bits_per_byte": 1.3904256436831104, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 534, "native_id": "Mercury_401195", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.545130729675293, "logits_per_token_corr": -1.545130729675293, "logits_per_char_corr": -0.7725653648376465, "bits_per_byte_corr": 1.1145762206146461}, "model_output": [{"sum_logits": -1.412907600402832, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.412907600402832, "logits_per_char": -0.706453800201416, "bits_per_byte": 1.0191973941684778, "num_chars": 2}, {"sum_logits": -1.3680601119995117, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3680601119995117, "logits_per_char": -0.6840300559997559, "bits_per_byte": 0.9868467696105567, "num_chars": 2}, {"sum_logits": -1.3150262832641602, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.3150262832641602, "logits_per_char": -0.6575131416320801, "bits_per_byte": 0.9485909487526061, "num_chars": 2}, {"sum_logits": -1.545130729675293, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.545130729675293, "logits_per_char": -0.7725653648376465, "bits_per_byte": 1.1145762206146461, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 535, "native_id": "CSZ10358", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2842694520950317, "logits_per_token_corr": -1.2842694520950317, "logits_per_char_corr": -0.6421347260475159, "bits_per_byte_corr": 0.9264045848520098}, "model_output": [{"sum_logits": -1.2842694520950317, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.2842694520950317, "logits_per_char": -0.6421347260475159, "bits_per_byte": 0.9264045848520098, "num_chars": 2}, {"sum_logits": -1.3779069185256958, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3779069185256958, "logits_per_char": -0.6889534592628479, "bits_per_byte": 0.9939497390825213, "num_chars": 2}, {"sum_logits": -1.280380368232727, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.280380368232727, "logits_per_char": -0.6401901841163635, "bits_per_byte": 0.9235992038511336, "num_chars": 2}, {"sum_logits": -1.791122555732727, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.791122555732727, "logits_per_char": -0.8955612778663635, "bits_per_byte": 1.292021814390914, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 536, "native_id": "MCAS_1999_4_26", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3518791198730469, "logits_per_token_corr": -1.3518791198730469, "logits_per_char_corr": -0.6759395599365234, "bits_per_byte_corr": 0.9751746510617912}, "model_output": [{"sum_logits": -1.4297657012939453, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4297657012939453, "logits_per_char": -0.7148828506469727, "bits_per_byte": 1.0313579434456939, "num_chars": 2}, {"sum_logits": -1.2481975555419922, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2481975555419922, "logits_per_char": -0.6240987777709961, "bits_per_byte": 0.9003842117157261, "num_chars": 2}, {"sum_logits": -1.3518791198730469, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3518791198730469, "logits_per_char": -0.6759395599365234, "bits_per_byte": 0.9751746510617912, "num_chars": 2}, {"sum_logits": -1.619729995727539, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.619729995727539, "logits_per_char": -0.8098649978637695, "bits_per_byte": 1.1683882162084507, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 537, "native_id": "AKDE&ED_2008_8_36", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4349417686462402, "logits_per_token_corr": -1.4349417686462402, "logits_per_char_corr": -0.7174708843231201, "bits_per_byte_corr": 1.0350916867959281}, "model_output": [{"sum_logits": -1.4349417686462402, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4349417686462402, "logits_per_char": -0.7174708843231201, "bits_per_byte": 1.0350916867959281, "num_chars": 2}, {"sum_logits": -1.4316725730895996, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4316725730895996, "logits_per_char": -0.7158362865447998, "bits_per_byte": 1.0327334606872955, "num_chars": 2}, {"sum_logits": -1.4290289878845215, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4290289878845215, "logits_per_char": -0.7145144939422607, "bits_per_byte": 1.0308265170545274, "num_chars": 2}, {"sum_logits": -1.305346965789795, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.305346965789795, "logits_per_char": -0.6526734828948975, "bits_per_byte": 0.9416087970928727, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 538, "native_id": "Mercury_7017938", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.424497127532959, "logits_per_token_corr": -1.424497127532959, "logits_per_char_corr": -0.7122485637664795, "bits_per_byte_corr": 1.0275574708269248}, "model_output": [{"sum_logits": -1.3281245231628418, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.3281245231628418, "logits_per_char": -0.6640622615814209, "bits_per_byte": 0.9580393316257139, "num_chars": 2}, {"sum_logits": -1.424497127532959, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.424497127532959, "logits_per_char": -0.7122485637664795, "bits_per_byte": 1.0275574708269248, "num_chars": 2}, {"sum_logits": -1.4483428001403809, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4483428001403809, "logits_per_char": -0.7241714000701904, "bits_per_byte": 1.044758487635632, "num_chars": 2}, {"sum_logits": -1.4032092094421387, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4032092094421387, "logits_per_char": -0.7016046047210693, "bits_per_byte": 1.0122014838966753, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 539, "native_id": "MDSA_2013_8_32", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.733246088027954, "logits_per_token_corr": -1.733246088027954, "logits_per_char_corr": -0.866623044013977, "bits_per_byte_corr": 1.2502727679199608}, "model_output": [{"sum_logits": -1.1683638095855713, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.1683638095855713, "logits_per_char": -0.5841819047927856, "bits_per_byte": 0.8427963370222259, "num_chars": 2}, {"sum_logits": -1.2534277439117432, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.2534277439117432, "logits_per_char": -0.6267138719558716, "bits_per_byte": 0.9041569951277063, "num_chars": 2}, {"sum_logits": -1.5446875095367432, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.5446875095367432, "logits_per_char": -0.7723437547683716, "bits_per_byte": 1.114256504866692, "num_chars": 2}, {"sum_logits": -1.733246088027954, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.733246088027954, "logits_per_char": -0.866623044013977, "bits_per_byte": 1.2502727679199608, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 540, "native_id": "Mercury_7038028", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4510334730148315, "logits_per_token_corr": -1.4510334730148315, "logits_per_char_corr": -0.7255167365074158, "bits_per_byte_corr": 1.0466993978419454}, "model_output": [{"sum_logits": -1.3251980543136597, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3251980543136597, "logits_per_char": -0.6625990271568298, "bits_per_byte": 0.9559283305776968, "num_chars": 2}, {"sum_logits": -1.1319321393966675, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.1319321393966675, "logits_per_char": -0.5659660696983337, "bits_per_byte": 0.8165164420657901, "num_chars": 2}, {"sum_logits": -1.4510334730148315, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4510334730148315, "logits_per_char": -0.7255167365074158, "bits_per_byte": 1.0466993978419454, "num_chars": 2}, {"sum_logits": -1.8519734144210815, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.8519734144210815, "logits_per_char": -0.9259867072105408, "bits_per_byte": 1.3359164304227076, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 541, "native_id": "Mercury_7057103", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3811273574829102, "logits_per_token_corr": -1.3811273574829102, "logits_per_char_corr": -0.6905636787414551, "bits_per_byte_corr": 0.9962727947390523}, "model_output": [{"sum_logits": -1.602768898010254, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.602768898010254, "logits_per_char": -0.801384449005127, "bits_per_byte": 1.1561533704260616, "num_chars": 2}, {"sum_logits": -1.1046075820922852, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.1046075820922852, "logits_per_char": -0.5523037910461426, "bits_per_byte": 0.7968059404070166, "num_chars": 2}, {"sum_logits": -1.3811273574829102, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3811273574829102, "logits_per_char": -0.6905636787414551, "bits_per_byte": 0.9962727947390523, "num_chars": 2}, {"sum_logits": -1.595707893371582, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.595707893371582, "logits_per_char": -0.797853946685791, "bits_per_byte": 1.151059932238105, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 542, "native_id": "NYSEDREGENTS_2008_4_26", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.344414234161377, "logits_per_token_corr": -1.344414234161377, "logits_per_char_corr": -0.6722071170806885, "bits_per_byte_corr": 0.9697898742632728}, "model_output": [{"sum_logits": -1.2849507331848145, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.2849507331848145, "logits_per_char": -0.6424753665924072, "bits_per_byte": 0.9268960252768507, "num_chars": 2}, {"sum_logits": -1.099764347076416, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.099764347076416, "logits_per_char": -0.549882173538208, "bits_per_byte": 0.793312284837387, "num_chars": 2}, {"sum_logits": -1.344414234161377, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.344414234161377, "logits_per_char": -0.6722071170806885, "bits_per_byte": 0.9697898742632728, "num_chars": 2}, {"sum_logits": -2.1171021461486816, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -2.1171021461486816, "logits_per_char": -1.0585510730743408, "bits_per_byte": 1.5271663836531395, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 543, "native_id": "Mercury_417117", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6804113388061523, "logits_per_token_corr": -1.6804113388061523, "logits_per_char_corr": -0.8402056694030762, "bits_per_byte_corr": 1.2121605525754806}, "model_output": [{"sum_logits": -1.465937614440918, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -1.465937614440918, "logits_per_char": -0.732968807220459, "bits_per_byte": 1.0574504633040145, "num_chars": 2}, {"sum_logits": -1.1738595962524414, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": true, "logits_per_token": -1.1738595962524414, "logits_per_char": -0.5869297981262207, "bits_per_byte": 0.8467607091072673, "num_chars": 2}, {"sum_logits": -1.6804113388061523, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -1.6804113388061523, "logits_per_char": -0.8402056694030762, "bits_per_byte": 1.2121605525754806, "num_chars": 2}, {"sum_logits": -1.4625520706176758, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -1.4625520706176758, "logits_per_char": -0.7312760353088379, "bits_per_byte": 1.0550083096617608, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 544, "native_id": "MCAS_2016_8_15", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2370073795318604, "logits_per_token_corr": -1.2370073795318604, "logits_per_char_corr": -0.6185036897659302, "bits_per_byte_corr": 0.8923122059974744}, "model_output": [{"sum_logits": -1.8142588138580322, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.8142588138580322, "logits_per_char": -0.9071294069290161, "bits_per_byte": 1.3087110968219782, "num_chars": 2}, {"sum_logits": -1.0947821140289307, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": true, "logits_per_token": -1.0947821140289307, "logits_per_char": -0.5473910570144653, "bits_per_byte": 0.7897183633823043, "num_chars": 2}, {"sum_logits": -1.2370073795318604, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.2370073795318604, "logits_per_char": -0.6185036897659302, "bits_per_byte": 0.8923122059974744, "num_chars": 2}, {"sum_logits": -1.5983507633209229, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.5983507633209229, "logits_per_char": -0.7991753816604614, "bits_per_byte": 1.1529663599229207, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 545, "native_id": "Mercury_400780", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6257543563842773, "logits_per_token_corr": -1.6257543563842773, "logits_per_char_corr": -0.8128771781921387, "bits_per_byte_corr": 1.1727338738304551}, "model_output": [{"sum_logits": -1.3935251235961914, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3935251235961914, "logits_per_char": -0.6967625617980957, "bits_per_byte": 1.0052158925839247, "num_chars": 2}, {"sum_logits": -1.2939081192016602, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.2939081192016602, "logits_per_char": -0.6469540596008301, "bits_per_byte": 0.933357413469771, "num_chars": 2}, {"sum_logits": -1.6257543563842773, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.6257543563842773, "logits_per_char": -0.8128771781921387, "bits_per_byte": 1.1727338738304551, "num_chars": 2}, {"sum_logits": -1.7571783065795898, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.7571783065795898, "logits_per_char": -0.8785891532897949, "bits_per_byte": 1.267536214430931, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 546, "native_id": "NYSEDREGENTS_2008_8_32", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1938586235046387, "logits_per_token_corr": -1.1938586235046387, "logits_per_char_corr": -0.5969293117523193, "bits_per_byte_corr": 0.8611869578269519}, "model_output": [{"sum_logits": -1.1938586235046387, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.1938586235046387, "logits_per_char": -0.5969293117523193, "bits_per_byte": 0.8611869578269519, "num_chars": 2}, {"sum_logits": -1.177466869354248, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.177466869354248, "logits_per_char": -0.588733434677124, "bits_per_byte": 0.8493628066148236, "num_chars": 2}, {"sum_logits": -1.4398226737976074, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4398226737976074, "logits_per_char": -0.7199113368988037, "bits_per_byte": 1.0386125156243942, "num_chars": 2}, {"sum_logits": -2.021270275115967, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -2.021270275115967, "logits_per_char": -1.0106351375579834, "bits_per_byte": 1.4580383011040856, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 547, "native_id": "Mercury_SC_416104", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.336482286453247, "logits_per_token_corr": -1.336482286453247, "logits_per_char_corr": -0.6682411432266235, "bits_per_byte_corr": 0.964068183451714}, "model_output": [{"sum_logits": -1.2174732685089111, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.2174732685089111, "logits_per_char": -0.6087366342544556, "bits_per_byte": 0.8782213234469727, "num_chars": 2}, {"sum_logits": -1.3464939594268799, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3464939594268799, "logits_per_char": -0.6732469797134399, "bits_per_byte": 0.9712900789267501, "num_chars": 2}, {"sum_logits": -1.336482286453247, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.336482286453247, "logits_per_char": -0.6682411432266235, "bits_per_byte": 0.964068183451714, "num_chars": 2}, {"sum_logits": -1.7715637683868408, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.7715637683868408, "logits_per_char": -0.8857818841934204, "bits_per_byte": 1.277913131636048, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 548, "native_id": "Mercury_416646", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3312095403671265, "logits_per_token_corr": -1.3312095403671265, "logits_per_char_corr": -0.6656047701835632, "bits_per_byte_corr": 0.9602647011365547}, "model_output": [{"sum_logits": -1.5889304876327515, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.5889304876327515, "logits_per_char": -0.7944652438163757, "bits_per_byte": 1.1461710674133498, "num_chars": 2}, {"sum_logits": -1.3312095403671265, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.3312095403671265, "logits_per_char": -0.6656047701835632, "bits_per_byte": 0.9602647011365547, "num_chars": 2}, {"sum_logits": -1.229692816734314, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -1.229692816734314, "logits_per_char": -0.614846408367157, "bits_per_byte": 0.8870358642603251, "num_chars": 2}, {"sum_logits": -1.5577529668807983, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.5577529668807983, "logits_per_char": -0.7788764834403992, "bits_per_byte": 1.123681240125306, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 549, "native_id": "Mercury_SC_405296", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2100224494934082, "logits_per_token_corr": -1.2100224494934082, "logits_per_char_corr": -0.6050112247467041, "bits_per_byte_corr": 0.8728466936248552}, "model_output": [{"sum_logits": -1.687544345855713, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.687544345855713, "logits_per_char": -0.8437721729278564, "bits_per_byte": 1.2173059295239979, "num_chars": 2}, {"sum_logits": -1.2280278205871582, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.2280278205871582, "logits_per_char": -0.6140139102935791, "bits_per_byte": 0.8858348234180238, "num_chars": 2}, {"sum_logits": -1.2100224494934082, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.2100224494934082, "logits_per_char": -0.6050112247467041, "bits_per_byte": 0.8728466936248552, "num_chars": 2}, {"sum_logits": -1.5585236549377441, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.5585236549377441, "logits_per_char": -0.7792618274688721, "bits_per_byte": 1.1242371740442205, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 550, "native_id": "MCAS_2006_8_31", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6310621500015259, "logits_per_token_corr": -1.6310621500015259, "logits_per_char_corr": -0.8155310750007629, "bits_per_byte_corr": 1.1765626375952913}, "model_output": [{"sum_logits": -1.3871465921401978, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3871465921401978, "logits_per_char": -0.6935732960700989, "bits_per_byte": 1.0006147547340634, "num_chars": 2}, {"sum_logits": -1.2780004739761353, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.2780004739761353, "logits_per_char": -0.6390002369880676, "bits_per_byte": 0.9218824730302199, "num_chars": 2}, {"sum_logits": -1.3689714670181274, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3689714670181274, "logits_per_char": -0.6844857335090637, "bits_per_byte": 0.9875041732934803, "num_chars": 2}, {"sum_logits": -1.6310621500015259, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.6310621500015259, "logits_per_char": -0.8155310750007629, "bits_per_byte": 1.1765626375952913, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 551, "native_id": "MCAS_2015_5_14", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6083405017852783, "logits_per_token_corr": -1.6083405017852783, "logits_per_char_corr": -0.8041702508926392, "bits_per_byte_corr": 1.1601724329940775}, "model_output": [{"sum_logits": -1.6083405017852783, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.6083405017852783, "logits_per_char": -0.8041702508926392, "bits_per_byte": 1.1601724329940775, "num_chars": 2}, {"sum_logits": -1.2553818225860596, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.2553818225860596, "logits_per_char": -0.6276909112930298, "bits_per_byte": 0.9055665649341789, "num_chars": 2}, {"sum_logits": -1.2438442707061768, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.2438442707061768, "logits_per_char": -0.6219221353530884, "bits_per_byte": 0.8972439804936199, "num_chars": 2}, {"sum_logits": -1.555445909500122, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.555445909500122, "logits_per_char": -0.777722954750061, "bits_per_byte": 1.1220170500042308, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 552, "native_id": "Mercury_417465", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3928449153900146, "logits_per_token_corr": -1.3928449153900146, "logits_per_char_corr": -0.6964224576950073, "bits_per_byte_corr": 1.004725226081013}, "model_output": [{"sum_logits": -1.4915978908538818, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4915978908538818, "logits_per_char": -0.7457989454269409, "bits_per_byte": 1.0759604400684393, "num_chars": 2}, {"sum_logits": -1.1940114498138428, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.1940114498138428, "logits_per_char": -0.5970057249069214, "bits_per_byte": 0.861297198706155, "num_chars": 2}, {"sum_logits": -1.3928449153900146, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3928449153900146, "logits_per_char": -0.6964224576950073, "bits_per_byte": 1.004725226081013, "num_chars": 2}, {"sum_logits": -1.6159417629241943, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.6159417629241943, "logits_per_char": -0.8079708814620972, "bits_per_byte": 1.1656555838688896, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 553, "native_id": "MCAS_1998_4_19", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.0775036811828613, "logits_per_token_corr": -2.0775036811828613, "logits_per_char_corr": -1.0387518405914307, "bits_per_byte_corr": 1.4986021291366167}, "model_output": [{"sum_logits": -1.3520454168319702, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3520454168319702, "logits_per_char": -0.6760227084159851, "bits_per_byte": 0.9752946089607681, "num_chars": 2}, {"sum_logits": -1.0214256048202515, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.0214256048202515, "logits_per_char": -0.5107128024101257, "bits_per_byte": 0.7368028273561228, "num_chars": 2}, {"sum_logits": -1.5474761724472046, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.5474761724472046, "logits_per_char": -0.7737380862236023, "bits_per_byte": 1.11626809994251, "num_chars": 2}, {"sum_logits": -2.0775036811828613, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -2.0775036811828613, "logits_per_char": -1.0387518405914307, "bits_per_byte": 1.4986021291366167, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 554, "native_id": "Mercury_7214778", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8395874500274658, "logits_per_token_corr": -1.8395874500274658, "logits_per_char_corr": -0.9197937250137329, "bits_per_byte_corr": 1.3269818457190528}, "model_output": [{"sum_logits": -1.278601884841919, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.278601884841919, "logits_per_char": -0.6393009424209595, "bits_per_byte": 0.9223162992670216, "num_chars": 2}, {"sum_logits": -1.3071033954620361, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3071033954620361, "logits_per_char": -0.6535516977310181, "bits_per_byte": 0.94287579328178, "num_chars": 2}, {"sum_logits": -1.2868893146514893, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.2868893146514893, "logits_per_char": -0.6434446573257446, "bits_per_byte": 0.9282944162110172, "num_chars": 2}, {"sum_logits": -1.8395874500274658, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.8395874500274658, "logits_per_char": -0.9197937250137329, "bits_per_byte": 1.3269818457190528, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 555, "native_id": "Mercury_7123393", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8365387916564941, "logits_per_token_corr": -1.8365387916564941, "logits_per_char_corr": -0.9182693958282471, "bits_per_byte_corr": 1.3247827035624684}, "model_output": [{"sum_logits": -1.6046509742736816, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.6046509742736816, "logits_per_char": -0.8023254871368408, "bits_per_byte": 1.1575110014719736, "num_chars": 2}, {"sum_logits": -1.03318452835083, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.03318452835083, "logits_per_char": -0.516592264175415, "bits_per_byte": 0.745285097688008, "num_chars": 2}, {"sum_logits": -1.324965000152588, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.324965000152588, "logits_per_char": -0.662482500076294, "bits_per_byte": 0.9557602175364782, "num_chars": 2}, {"sum_logits": -1.8365387916564941, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.8365387916564941, "logits_per_char": -0.9182693958282471, "bits_per_byte": 1.3247827035624684, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 556, "native_id": "Mercury_7207550", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7480827569961548, "logits_per_token_corr": -1.7480827569961548, "logits_per_char_corr": -0.8740413784980774, "bits_per_byte_corr": 1.2609751622918357}, "model_output": [{"sum_logits": -1.373761773109436, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": false, "logits_per_token": -1.373761773109436, "logits_per_char": -0.686880886554718, "bits_per_byte": 0.9909596487146183, "num_chars": 2}, {"sum_logits": -1.1820179224014282, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": true, "logits_per_token": -1.1820179224014282, "logits_per_char": -0.5910089612007141, "bits_per_byte": 0.8526456974458206, "num_chars": 2}, {"sum_logits": -1.379153847694397, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": false, "logits_per_token": -1.379153847694397, "logits_per_char": -0.6895769238471985, "bits_per_byte": 0.9948492083465343, "num_chars": 2}, {"sum_logits": -1.7480827569961548, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": false, "logits_per_token": -1.7480827569961548, "logits_per_char": -0.8740413784980774, "bits_per_byte": 1.2609751622918357, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 557, "native_id": "Mercury_SC_405827", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.209797978401184, "logits_per_token_corr": -1.209797978401184, "logits_per_char_corr": -0.604898989200592, "bits_per_byte_corr": 0.8726847719590677}, "model_output": [{"sum_logits": -1.0357104539871216, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": true, "logits_per_token": -1.0357104539871216, "logits_per_char": -0.5178552269935608, "bits_per_byte": 0.7471071678825754, "num_chars": 2}, {"sum_logits": -1.209797978401184, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.209797978401184, "logits_per_char": -0.604898989200592, "bits_per_byte": 0.8726847719590677, "num_chars": 2}, {"sum_logits": -1.6419745683670044, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.6419745683670044, "logits_per_char": -0.8209872841835022, "bits_per_byte": 1.1844342835252877, "num_chars": 2}, {"sum_logits": -1.9589205980300903, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.9589205980300903, "logits_per_char": -0.9794602990150452, "bits_per_byte": 1.413062516137642, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 558, "native_id": "NYSEDREGENTS_2015_4_11", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.366479516029358, "logits_per_token_corr": -1.366479516029358, "logits_per_char_corr": -0.683239758014679, "bits_per_byte_corr": 0.9857066106266609}, "model_output": [{"sum_logits": -1.3823410272598267, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3823410272598267, "logits_per_char": -0.6911705136299133, "bits_per_byte": 0.9971482724232701, "num_chars": 2}, {"sum_logits": -1.3868862390518188, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3868862390518188, "logits_per_char": -0.6934431195259094, "bits_per_byte": 1.000426949679321, "num_chars": 2}, {"sum_logits": -1.477716088294983, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.477716088294983, "logits_per_char": -0.7388580441474915, "bits_per_byte": 1.0659468362132705, "num_chars": 2}, {"sum_logits": -1.366479516029358, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.366479516029358, "logits_per_char": -0.683239758014679, "bits_per_byte": 0.9857066106266609, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 559, "native_id": "Mercury_404097", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4389441013336182, "logits_per_token_corr": -1.4389441013336182, "logits_per_char_corr": -0.7194720506668091, "bits_per_byte_corr": 1.0379787595559642}, "model_output": [{"sum_logits": -1.2806036472320557, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.2806036472320557, "logits_per_char": -0.6403018236160278, "bits_per_byte": 0.9237602656036668, "num_chars": 2}, {"sum_logits": -1.25063157081604, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.25063157081604, "logits_per_char": -0.62531578540802, "bits_per_byte": 0.9021399825983858, "num_chars": 2}, {"sum_logits": -1.4389441013336182, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4389441013336182, "logits_per_char": -0.7194720506668091, "bits_per_byte": 1.0379787595559642, "num_chars": 2}, {"sum_logits": -1.7450697422027588, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.7450697422027588, "logits_per_char": -0.8725348711013794, "bits_per_byte": 1.2588017315415554, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 560, "native_id": "AIMS_2009_4_4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.469235897064209, "logits_per_token_corr": -1.469235897064209, "logits_per_char_corr": -0.7346179485321045, "bits_per_byte_corr": 1.0598296712960524}, "model_output": [{"sum_logits": -1.2680010795593262, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.2680010795593262, "logits_per_char": -0.6340005397796631, "bits_per_byte": 0.914669434661703, "num_chars": 2}, {"sum_logits": -0.9798426628112793, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -0.9798426628112793, "logits_per_char": -0.48992133140563965, "bits_per_byte": 0.7068070752451425, "num_chars": 2}, {"sum_logits": -1.469235897064209, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.469235897064209, "logits_per_char": -0.7346179485321045, "bits_per_byte": 1.0598296712960524, "num_chars": 2}, {"sum_logits": -2.2395377159118652, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -2.2395377159118652, "logits_per_char": -1.1197688579559326, "bits_per_byte": 1.6154849783160827, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 561, "native_id": "NCEOGA_2013_8_18", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5579291582107544, "logits_per_token_corr": -1.5579291582107544, "logits_per_char_corr": -0.7789645791053772, "bits_per_byte_corr": 1.1238083353042938}, "model_output": [{"sum_logits": -1.5579291582107544, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5579291582107544, "logits_per_char": -0.7789645791053772, "bits_per_byte": 1.1238083353042938, "num_chars": 2}, {"sum_logits": -1.1562186479568481, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.1562186479568481, "logits_per_char": -0.5781093239784241, "bits_per_byte": 0.8340354547959428, "num_chars": 2}, {"sum_logits": -1.3845969438552856, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3845969438552856, "logits_per_char": -0.6922984719276428, "bits_per_byte": 0.9987755722657351, "num_chars": 2}, {"sum_logits": -1.5707255601882935, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5707255601882935, "logits_per_char": -0.7853627800941467, "bits_per_byte": 1.133038988141409, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 562, "native_id": "Mercury_400884", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3680511713027954, "logits_per_token_corr": -1.3680511713027954, "logits_per_char_corr": -0.6840255856513977, "bits_per_byte_corr": 0.9868403202611493}, "model_output": [{"sum_logits": -1.4778457880020142, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4778457880020142, "logits_per_char": -0.7389228940010071, "bits_per_byte": 1.06604039477534, "num_chars": 2}, {"sum_logits": -1.3680511713027954, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.3680511713027954, "logits_per_char": -0.6840255856513977, "bits_per_byte": 0.9868403202611493, "num_chars": 2}, {"sum_logits": -1.5090175867080688, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5090175867080688, "logits_per_char": -0.7545087933540344, "bits_per_byte": 1.0885260944797632, "num_chars": 2}, {"sum_logits": -1.6577564477920532, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.6577564477920532, "logits_per_char": -0.8288782238960266, "bits_per_byte": 1.1958185031165085, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 563, "native_id": "Mercury_7219678", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2486913204193115, "logits_per_token_corr": -1.2486913204193115, "logits_per_char_corr": -0.6243456602096558, "bits_per_byte_corr": 0.9007403877856633}, "model_output": [{"sum_logits": -1.2486913204193115, "num_tokens": 1, "num_tokens_all": 422, "is_greedy": false, "logits_per_token": -1.2486913204193115, "logits_per_char": -0.6243456602096558, "bits_per_byte": 0.9007403877856633, "num_chars": 2}, {"sum_logits": -1.2307965755462646, "num_tokens": 1, "num_tokens_all": 422, "is_greedy": true, "logits_per_token": -1.2307965755462646, "logits_per_char": -0.6153982877731323, "bits_per_byte": 0.8878320579424951, "num_chars": 2}, {"sum_logits": -1.40159010887146, "num_tokens": 1, "num_tokens_all": 422, "is_greedy": false, "logits_per_token": -1.40159010887146, "logits_per_char": -0.70079505443573, "bits_per_byte": 1.0110335497146652, "num_chars": 2}, {"sum_logits": -1.8434484004974365, "num_tokens": 1, "num_tokens_all": 422, "is_greedy": false, "logits_per_token": -1.8434484004974365, "logits_per_char": -0.9217242002487183, "bits_per_byte": 1.329766932767127, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 564, "native_id": "ACTAAP_2010_5_7", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.653390884399414, "logits_per_token_corr": -1.653390884399414, "logits_per_char_corr": -0.826695442199707, "bits_per_byte_corr": 1.192669414787883}, "model_output": [{"sum_logits": -1.5331459045410156, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.5331459045410156, "logits_per_char": -0.7665729522705078, "bits_per_byte": 1.1059309967210682, "num_chars": 2}, {"sum_logits": -1.653390884399414, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.653390884399414, "logits_per_char": -0.826695442199707, "bits_per_byte": 1.192669414787883, "num_chars": 2}, {"sum_logits": -1.457712173461914, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.457712173461914, "logits_per_char": -0.728856086730957, "bits_per_byte": 1.0515170618492433, "num_chars": 2}, {"sum_logits": -1.5601940155029297, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.5601940155029297, "logits_per_char": -0.7800970077514648, "bits_per_byte": 1.1254420844961661, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 565, "native_id": "ACTAAP_2012_7_9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5881078243255615, "logits_per_token_corr": -1.5881078243255615, "logits_per_char_corr": -0.7940539121627808, "bits_per_byte_corr": 1.1455776412765473}, "model_output": [{"sum_logits": -0.9617593288421631, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -0.9617593288421631, "logits_per_char": -0.48087966442108154, "bits_per_byte": 0.6937627071251417, "num_chars": 2}, {"sum_logits": -1.217649221420288, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.217649221420288, "logits_per_char": -0.608824610710144, "bits_per_byte": 0.8783482466433096, "num_chars": 2}, {"sum_logits": -1.5881078243255615, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.5881078243255615, "logits_per_char": -0.7940539121627808, "bits_per_byte": 1.1455776412765473, "num_chars": 2}, {"sum_logits": -2.27298903465271, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -2.27298903465271, "logits_per_char": -1.136494517326355, "bits_per_byte": 1.6396150041454063, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 566, "native_id": "MCAS_2005_8_6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.440385103225708, "logits_per_token_corr": -1.440385103225708, "logits_per_char_corr": -0.720192551612854, "bits_per_byte_corr": 1.0390182226977798}, "model_output": [{"sum_logits": -1.440385103225708, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.440385103225708, "logits_per_char": -0.720192551612854, "bits_per_byte": 1.0390182226977798, "num_chars": 2}, {"sum_logits": -1.055152177810669, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": true, "logits_per_token": -1.055152177810669, "logits_per_char": -0.5275760889053345, "bits_per_byte": 0.7611314071558677, "num_chars": 2}, {"sum_logits": -1.3465015888214111, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.3465015888214111, "logits_per_char": -0.6732507944107056, "bits_per_byte": 0.9712955823715778, "num_chars": 2}, {"sum_logits": -1.9691708087921143, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.9691708087921143, "logits_per_char": -0.9845854043960571, "bits_per_byte": 1.4204564802548667, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 567, "native_id": "Mercury_SC_401162", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.8474216461181641, "logits_per_token_corr": -0.8474216461181641, "logits_per_char_corr": -0.42371082305908203, "bits_per_byte_corr": 0.6112855031987579}, "model_output": [{"sum_logits": -1.6013240814208984, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.6013240814208984, "logits_per_char": -0.8006620407104492, "bits_per_byte": 1.1551111555618323, "num_chars": 2}, {"sum_logits": -1.38037109375, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.38037109375, "logits_per_char": -0.690185546875, "bits_per_byte": 0.9957272657705151, "num_chars": 2}, {"sum_logits": -0.8474216461181641, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -0.8474216461181641, "logits_per_char": -0.42371082305908203, "bits_per_byte": 0.6112855031987579, "num_chars": 2}, {"sum_logits": -2.3523025512695312, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -2.3523025512695312, "logits_per_char": -1.1761512756347656, "bits_per_byte": 1.6968276126947237, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 568, "native_id": "Mercury_SC_407710", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9565571546554565, "logits_per_token_corr": -1.9565571546554565, "logits_per_char_corr": -0.9782785773277283, "bits_per_byte_corr": 1.4113576521196378}, "model_output": [{"sum_logits": -1.0167359113693237, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.0167359113693237, "logits_per_char": -0.5083679556846619, "bits_per_byte": 0.7334199286136489, "num_chars": 2}, {"sum_logits": -1.4384926557540894, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4384926557540894, "logits_per_char": -0.7192463278770447, "bits_per_byte": 1.0376531104065552, "num_chars": 2}, {"sum_logits": -1.4499043226242065, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4499043226242065, "logits_per_char": -0.7249521613121033, "bits_per_byte": 1.0458848880074587, "num_chars": 2}, {"sum_logits": -1.9565571546554565, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.9565571546554565, "logits_per_char": -0.9782785773277283, "bits_per_byte": 1.4113576521196378, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 569, "native_id": "VASoL_2009_3_23", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.936079740524292, "logits_per_token_corr": -1.936079740524292, "logits_per_char_corr": -0.968039870262146, "bits_per_byte_corr": 1.396586320210997}, "model_output": [{"sum_logits": -1.2881629467010498, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.2881629467010498, "logits_per_char": -0.6440814733505249, "bits_per_byte": 0.9292131475319269, "num_chars": 2}, {"sum_logits": -1.157597303390503, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.157597303390503, "logits_per_char": -0.5787986516952515, "bits_per_byte": 0.8350299444745576, "num_chars": 2}, {"sum_logits": -1.394331693649292, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.394331693649292, "logits_per_char": -0.697165846824646, "bits_per_byte": 1.005797709891794, "num_chars": 2}, {"sum_logits": -1.936079740524292, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.936079740524292, "logits_per_char": -0.968039870262146, "bits_per_byte": 1.396586320210997, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 570, "native_id": "Mercury_SC_402276", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7183425426483154, "logits_per_token_corr": -1.7183425426483154, "logits_per_char_corr": -0.8591712713241577, "bits_per_byte_corr": 1.239522132414519}, "model_output": [{"sum_logits": -1.5993945598602295, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.5993945598602295, "logits_per_char": -0.7996972799301147, "bits_per_byte": 1.1537192999683985, "num_chars": 2}, {"sum_logits": -1.2358310222625732, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.2358310222625732, "logits_per_char": -0.6179155111312866, "bits_per_byte": 0.8914636435981167, "num_chars": 2}, {"sum_logits": -1.1462466716766357, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.1462466716766357, "logits_per_char": -0.5731233358383179, "bits_per_byte": 0.8268421944322751, "num_chars": 2}, {"sum_logits": -1.7183425426483154, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.7183425426483154, "logits_per_char": -0.8591712713241577, "bits_per_byte": 1.239522132414519, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 571, "native_id": "Mercury_400744", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9491004943847656, "logits_per_token_corr": -1.9491004943847656, "logits_per_char_corr": -0.9745502471923828, "bits_per_byte_corr": 1.4059788087225742}, "model_output": [{"sum_logits": -1.9491004943847656, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.9491004943847656, "logits_per_char": -0.9745502471923828, "bits_per_byte": 1.4059788087225742, "num_chars": 2}, {"sum_logits": -1.3746013641357422, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3746013641357422, "logits_per_char": -0.6873006820678711, "bits_per_byte": 0.9915652856196321, "num_chars": 2}, {"sum_logits": -1.0959758758544922, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.0959758758544922, "logits_per_char": -0.5479879379272461, "bits_per_byte": 0.790579480515175, "num_chars": 2}, {"sum_logits": -1.4315338134765625, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4315338134765625, "logits_per_char": -0.7157669067382812, "bits_per_byte": 1.0326333667844934, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 572, "native_id": "Mercury_SC_LBS10902", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2537671327590942, "logits_per_token_corr": -1.2537671327590942, "logits_per_char_corr": -0.6268835663795471, "bits_per_byte_corr": 0.9044018124312097}, "model_output": [{"sum_logits": -1.4320565462112427, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4320565462112427, "logits_per_char": -0.7160282731056213, "bits_per_byte": 1.0330104387465104, "num_chars": 2}, {"sum_logits": -1.2537671327590942, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.2537671327590942, "logits_per_char": -0.6268835663795471, "bits_per_byte": 0.9044018124312097, "num_chars": 2}, {"sum_logits": -1.3249512910842896, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3249512910842896, "logits_per_char": -0.6624756455421448, "bits_per_byte": 0.9557503285340536, "num_chars": 2}, {"sum_logits": -1.617519497871399, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.617519497871399, "logits_per_char": -0.8087597489356995, "bits_per_byte": 1.166793679060975, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 573, "native_id": "Mercury_7133245", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4162137508392334, "logits_per_token_corr": -1.4162137508392334, "logits_per_char_corr": -0.7081068754196167, "bits_per_byte_corr": 1.021582277587994}, "model_output": [{"sum_logits": -1.4059293270111084, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4059293270111084, "logits_per_char": -0.7029646635055542, "bits_per_byte": 1.0141636339603706, "num_chars": 2}, {"sum_logits": -1.4162137508392334, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4162137508392334, "logits_per_char": -0.7081068754196167, "bits_per_byte": 1.021582277587994, "num_chars": 2}, {"sum_logits": -1.1912782192230225, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.1912782192230225, "logits_per_char": -0.5956391096115112, "bits_per_byte": 0.8593255895966623, "num_chars": 2}, {"sum_logits": -1.7398946285247803, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.7398946285247803, "logits_per_char": -0.8699473142623901, "bits_per_byte": 1.2550686761219245, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 574, "native_id": "Mercury_7131530", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1684303283691406, "logits_per_token_corr": -1.1684303283691406, "logits_per_char_corr": -0.5842151641845703, "bits_per_byte_corr": 0.8428443201818167}, "model_output": [{"sum_logits": -1.30126953125, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.30126953125, "logits_per_char": -0.650634765625, "bits_per_byte": 0.9386675497978149, "num_chars": 2}, {"sum_logits": -1.1684303283691406, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": true, "logits_per_token": -1.1684303283691406, "logits_per_char": -0.5842151641845703, "bits_per_byte": 0.8428443201818167, "num_chars": 2}, {"sum_logits": -1.4346294403076172, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.4346294403076172, "logits_per_char": -0.7173147201538086, "bits_per_byte": 1.0348663895232977, "num_chars": 2}, {"sum_logits": -1.830301284790039, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.830301284790039, "logits_per_char": -0.9151506423950195, "bits_per_byte": 1.3202832934505924, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 575, "native_id": "Mercury_7041143", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.324042558670044, "logits_per_token_corr": -1.324042558670044, "logits_per_char_corr": -0.662021279335022, "bits_per_byte_corr": 0.9550948166602896}, "model_output": [{"sum_logits": -1.4217674732208252, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4217674732208252, "logits_per_char": -0.7108837366104126, "bits_per_byte": 1.0255884414571952, "num_chars": 2}, {"sum_logits": -1.1953575611114502, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.1953575611114502, "logits_per_char": -0.5976787805557251, "bits_per_byte": 0.8622682127529271, "num_chars": 2}, {"sum_logits": -1.324042558670044, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.324042558670044, "logits_per_char": -0.662021279335022, "bits_per_byte": 0.9550948166602896, "num_chars": 2}, {"sum_logits": -1.860917329788208, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.860917329788208, "logits_per_char": -0.930458664894104, "bits_per_byte": 1.3423681015958542, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 576, "native_id": "MCAS_2010_5_11984", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5688809156417847, "logits_per_token_corr": -1.5688809156417847, "logits_per_char_corr": -0.7844404578208923, "bits_per_byte_corr": 1.1317083583716825}, "model_output": [{"sum_logits": -1.5688809156417847, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5688809156417847, "logits_per_char": -0.7844404578208923, "bits_per_byte": 1.1317083583716825, "num_chars": 2}, {"sum_logits": -1.1720303297042847, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.1720303297042847, "logits_per_char": -0.5860151648521423, "bits_per_byte": 0.8454411722185216, "num_chars": 2}, {"sum_logits": -1.259959101676941, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.259959101676941, "logits_per_char": -0.6299795508384705, "bits_per_byte": 0.9088683738567709, "num_chars": 2}, {"sum_logits": -1.7007168531417847, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.7007168531417847, "logits_per_char": -0.8503584265708923, "bits_per_byte": 1.2268078849928494, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 577, "native_id": "Mercury_7159285", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3756508827209473, "logits_per_token_corr": -1.3756508827209473, "logits_per_char_corr": -0.6878254413604736, "bits_per_byte_corr": 0.9923223532487307}, "model_output": [{"sum_logits": -1.572990894317627, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.572990894317627, "logits_per_char": -0.7864954471588135, "bits_per_byte": 1.1346730812985832, "num_chars": 2}, {"sum_logits": -0.9787869453430176, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": true, "logits_per_token": -0.9787869453430176, "logits_per_char": -0.4893934726715088, "bits_per_byte": 0.7060455360671214, "num_chars": 2}, {"sum_logits": -1.3756508827209473, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.3756508827209473, "logits_per_char": -0.6878254413604736, "bits_per_byte": 0.9923223532487307, "num_chars": 2}, {"sum_logits": -1.8832058906555176, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.8832058906555176, "logits_per_char": -0.9416029453277588, "bits_per_byte": 1.3584458997117752, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 578, "native_id": "AIMS_2008_8_13", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6387377977371216, "logits_per_token_corr": -1.6387377977371216, "logits_per_char_corr": -0.8193688988685608, "bits_per_byte_corr": 1.1820994470571724}, "model_output": [{"sum_logits": -1.595265507698059, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.595265507698059, "logits_per_char": -0.7976327538490295, "bits_per_byte": 1.150740818429429, "num_chars": 2}, {"sum_logits": -1.0033999681472778, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.0033999681472778, "logits_per_char": -0.5016999840736389, "bits_per_byte": 0.7238000790376308, "num_chars": 2}, {"sum_logits": -1.4876338243484497, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4876338243484497, "logits_per_char": -0.7438169121742249, "bits_per_byte": 1.0731009705238668, "num_chars": 2}, {"sum_logits": -1.6387377977371216, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.6387377977371216, "logits_per_char": -0.8193688988685608, "bits_per_byte": 1.1820994470571724, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 579, "native_id": "MDSA_2013_8_20", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4587604999542236, "logits_per_token_corr": -1.4587604999542236, "logits_per_char_corr": -0.7293802499771118, "bits_per_byte_corr": 1.0522732695650876}, "model_output": [{"sum_logits": -1.1504051685333252, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.1504051685333252, "logits_per_char": -0.5752025842666626, "bits_per_byte": 0.8298419158286264, "num_chars": 2}, {"sum_logits": -1.3429596424102783, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3429596424102783, "logits_per_char": -0.6714798212051392, "bits_per_byte": 0.968740608110358, "num_chars": 2}, {"sum_logits": -1.4587604999542236, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4587604999542236, "logits_per_char": -0.7293802499771118, "bits_per_byte": 1.0522732695650876, "num_chars": 2}, {"sum_logits": -1.7496368885040283, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.7496368885040283, "logits_per_char": -0.8748184442520142, "bits_per_byte": 1.2620962312014856, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 580, "native_id": "Mercury_7114100", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2785582542419434, "logits_per_token_corr": -1.2785582542419434, "logits_per_char_corr": -0.6392791271209717, "bits_per_byte_corr": 0.9222848264419137}, "model_output": [{"sum_logits": -1.2785582542419434, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.2785582542419434, "logits_per_char": -0.6392791271209717, "bits_per_byte": 0.9222848264419137, "num_chars": 2}, {"sum_logits": -1.1329989433288574, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.1329989433288574, "logits_per_char": -0.5664994716644287, "bits_per_byte": 0.8172859784370763, "num_chars": 2}, {"sum_logits": -1.4099249839782715, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4099249839782715, "logits_per_char": -0.7049624919891357, "bits_per_byte": 1.0170458912061824, "num_chars": 2}, {"sum_logits": -1.973517894744873, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.973517894744873, "logits_per_char": -0.9867589473724365, "bits_per_byte": 1.4235922399280505, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 581, "native_id": "Mercury_7213343", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4319660663604736, "logits_per_token_corr": -1.4319660663604736, "logits_per_char_corr": -0.7159830331802368, "bits_per_byte_corr": 1.032945171330508}, "model_output": [{"sum_logits": -1.4319660663604736, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4319660663604736, "logits_per_char": -0.7159830331802368, "bits_per_byte": 1.032945171330508, "num_chars": 2}, {"sum_logits": -0.9868729114532471, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -0.9868729114532471, "logits_per_char": -0.49343645572662354, "bits_per_byte": 0.7118783276711378, "num_chars": 2}, {"sum_logits": -1.4044601917266846, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4044601917266846, "logits_per_char": -0.7022300958633423, "bits_per_byte": 1.0131038768657532, "num_chars": 2}, {"sum_logits": -2.028857469558716, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -2.028857469558716, "logits_per_char": -1.014428734779358, "bits_per_byte": 1.4635113050024966, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 582, "native_id": "Mercury_SC_LBS10597", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5340664386749268, "logits_per_token_corr": -1.5340664386749268, "logits_per_char_corr": -0.7670332193374634, "bits_per_byte_corr": 1.10659502173605}, "model_output": [{"sum_logits": -1.5340664386749268, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.5340664386749268, "logits_per_char": -0.7670332193374634, "bits_per_byte": 1.10659502173605, "num_chars": 2}, {"sum_logits": -1.1173012256622314, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.1173012256622314, "logits_per_char": -0.5586506128311157, "bits_per_byte": 0.80596246872161, "num_chars": 2}, {"sum_logits": -1.386775255203247, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.386775255203247, "logits_per_char": -0.6933876276016235, "bits_per_byte": 1.0003468917553442, "num_chars": 2}, {"sum_logits": -1.6369221210479736, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.6369221210479736, "logits_per_char": -0.8184610605239868, "bits_per_byte": 1.1807897131795257, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 583, "native_id": "Mercury_7126263", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4242643117904663, "logits_per_token_corr": -1.4242643117904663, "logits_per_char_corr": -0.7121321558952332, "bits_per_byte_corr": 1.027389529768357}, "model_output": [{"sum_logits": -1.4219964742660522, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4219964742660522, "logits_per_char": -0.7109982371330261, "bits_per_byte": 1.025753630793349, "num_chars": 2}, {"sum_logits": -1.4242643117904663, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4242643117904663, "logits_per_char": -0.7121321558952332, "bits_per_byte": 1.027389529768357, "num_chars": 2}, {"sum_logits": -1.3662618398666382, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.3662618398666382, "logits_per_char": -0.6831309199333191, "bits_per_byte": 0.985549590466423, "num_chars": 2}, {"sum_logits": -1.4223130941390991, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4223130941390991, "logits_per_char": -0.7111565470695496, "bits_per_byte": 1.025982023753695, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 584, "native_id": "Mercury_7133613", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.485467553138733, "logits_per_token_corr": -1.485467553138733, "logits_per_char_corr": -0.7427337765693665, "bits_per_byte_corr": 1.071538336158126}, "model_output": [{"sum_logits": -1.485467553138733, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.485467553138733, "logits_per_char": -0.7427337765693665, "bits_per_byte": 1.071538336158126, "num_chars": 2}, {"sum_logits": -1.153812050819397, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": true, "logits_per_token": -1.153812050819397, "logits_per_char": -0.5769060254096985, "bits_per_byte": 0.8322994619181323, "num_chars": 2}, {"sum_logits": -1.3543392419815063, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.3543392419815063, "logits_per_char": -0.6771696209907532, "bits_per_byte": 0.9769492540447203, "num_chars": 2}, {"sum_logits": -1.741143822669983, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.741143822669983, "logits_per_char": -0.8705719113349915, "bits_per_byte": 1.2559697792211209, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 585, "native_id": "Mercury_7234605", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9853962659835815, "logits_per_token_corr": -1.9853962659835815, "logits_per_char_corr": -0.9926981329917908, "bits_per_byte_corr": 1.4321606735680181}, "model_output": [{"sum_logits": -1.2593010663986206, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.2593010663986206, "logits_per_char": -0.6296505331993103, "bits_per_byte": 0.9083937017403892, "num_chars": 2}, {"sum_logits": -1.2184704542160034, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2184704542160034, "logits_per_char": -0.6092352271080017, "bits_per_byte": 0.8789406408842069, "num_chars": 2}, {"sum_logits": -1.298622965812683, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.298622965812683, "logits_per_char": -0.6493114829063416, "bits_per_byte": 0.9367584563819109, "num_chars": 2}, {"sum_logits": -1.9853962659835815, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.9853962659835815, "logits_per_char": -0.9926981329917908, "bits_per_byte": 1.4321606735680181, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 586, "native_id": "Mercury_SC_400839", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.095102071762085, "logits_per_token_corr": -1.095102071762085, "logits_per_char_corr": -0.5475510358810425, "bits_per_byte_corr": 0.7899491640997623}, "model_output": [{"sum_logits": -1.2823121547698975, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.2823121547698975, "logits_per_char": -0.6411560773849487, "bits_per_byte": 0.9249926932797505, "num_chars": 2}, {"sum_logits": -1.095102071762085, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.095102071762085, "logits_per_char": -0.5475510358810425, "bits_per_byte": 0.7899491640997623, "num_chars": 2}, {"sum_logits": -1.4384381771087646, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4384381771087646, "logits_per_char": -0.7192190885543823, "bits_per_byte": 1.0376138123708332, "num_chars": 2}, {"sum_logits": -2.003038167953491, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -2.003038167953491, "logits_per_char": -1.0015190839767456, "bits_per_byte": 1.4448866158099463, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 587, "native_id": "Mercury_SC_402984", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.168929934501648, "logits_per_token_corr": -1.168929934501648, "logits_per_char_corr": -0.584464967250824, "bits_per_byte_corr": 0.8432047098266999}, "model_output": [{"sum_logits": -1.1701353788375854, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.1701353788375854, "logits_per_char": -0.5850676894187927, "bits_per_byte": 0.8440742541094629, "num_chars": 2}, {"sum_logits": -1.168929934501648, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.168929934501648, "logits_per_char": -0.584464967250824, "bits_per_byte": 0.8432047098266999, "num_chars": 2}, {"sum_logits": -1.5630663633346558, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5630663633346558, "logits_per_char": -0.7815331816673279, "bits_per_byte": 1.1275140454824373, "num_chars": 2}, {"sum_logits": -1.8588465452194214, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.8588465452194214, "logits_per_char": -0.9294232726097107, "bits_per_byte": 1.340874346281784, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 588, "native_id": "NYSEDREGENTS_2012_4_29", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5174481868743896, "logits_per_token_corr": -1.5174481868743896, "logits_per_char_corr": -0.7587240934371948, "bits_per_byte_corr": 1.094607487005602}, "model_output": [{"sum_logits": -0.985492467880249, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": true, "logits_per_token": -0.985492467880249, "logits_per_char": -0.4927462339401245, "bits_per_byte": 0.7108825481226414, "num_chars": 2}, {"sum_logits": -1.4023091793060303, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.4023091793060303, "logits_per_char": -0.7011545896530151, "bits_per_byte": 1.0115522493896678, "num_chars": 2}, {"sum_logits": -1.5174481868743896, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.5174481868743896, "logits_per_char": -0.7587240934371948, "bits_per_byte": 1.094607487005602, "num_chars": 2}, {"sum_logits": -2.0150563716888428, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -2.0150563716888428, "logits_per_char": -1.0075281858444214, "bits_per_byte": 1.4535559172746448, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 589, "native_id": "VASoL_2009_3_22", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9966583251953125, "logits_per_token_corr": -1.9966583251953125, "logits_per_char_corr": -0.9983291625976562, "bits_per_byte_corr": 1.440284532055505}, "model_output": [{"sum_logits": -1.3219928741455078, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3219928741455078, "logits_per_char": -0.6609964370727539, "bits_per_byte": 0.9536162818108209, "num_chars": 2}, {"sum_logits": -1.1124134063720703, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.1124134063720703, "logits_per_char": -0.5562067031860352, "bits_per_byte": 0.802436652396269, "num_chars": 2}, {"sum_logits": -1.3748226165771484, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3748226165771484, "logits_per_char": -0.6874113082885742, "bits_per_byte": 0.9917248855196329, "num_chars": 2}, {"sum_logits": -1.9966583251953125, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.9966583251953125, "logits_per_char": -0.9983291625976562, "bits_per_byte": 1.440284532055505, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 590, "native_id": "Mercury_409349", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3556967973709106, "logits_per_token_corr": -1.3556967973709106, "logits_per_char_corr": -0.6778483986854553, "bits_per_byte_corr": 0.9779285232587339}, "model_output": [{"sum_logits": -1.3556967973709106, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.3556967973709106, "logits_per_char": -0.6778483986854553, "bits_per_byte": 0.9779285232587339, "num_chars": 2}, {"sum_logits": -1.294852375984192, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.294852375984192, "logits_per_char": -0.647426187992096, "bits_per_byte": 0.9340385507585136, "num_chars": 2}, {"sum_logits": -1.49280846118927, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.49280846118927, "logits_per_char": -0.746404230594635, "bits_per_byte": 1.0768336819781958, "num_chars": 2}, {"sum_logits": -1.5154677629470825, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.5154677629470825, "logits_per_char": -0.7577338814735413, "bits_per_byte": 1.0931789131162089, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 591, "native_id": "Mercury_SC_407417", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7313265800476074, "logits_per_token_corr": -1.7313265800476074, "logits_per_char_corr": -0.8656632900238037, "bits_per_byte_corr": 1.2488881355978634}, "model_output": [{"sum_logits": -1.4106993675231934, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4106993675231934, "logits_per_char": -0.7053496837615967, "bits_per_byte": 1.0176044908561852, "num_chars": 2}, {"sum_logits": -1.2429022789001465, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2429022789001465, "logits_per_char": -0.6214511394500732, "bits_per_byte": 0.8965644770400605, "num_chars": 2}, {"sum_logits": -1.2932372093200684, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.2932372093200684, "logits_per_char": -0.6466186046600342, "bits_per_byte": 0.9328734542902427, "num_chars": 2}, {"sum_logits": -1.7313265800476074, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.7313265800476074, "logits_per_char": -0.8656632900238037, "bits_per_byte": 1.2488881355978634, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 592, "native_id": "VASoL_2007_5_21", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2087104320526123, "logits_per_token_corr": -1.2087104320526123, "logits_per_char_corr": -0.6043552160263062, "bits_per_byte_corr": 0.8719002730971565}, "model_output": [{"sum_logits": -1.6278598308563232, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.6278598308563232, "logits_per_char": -0.8139299154281616, "bits_per_byte": 1.1742526526202257, "num_chars": 2}, {"sum_logits": -1.254805326461792, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.254805326461792, "logits_per_char": -0.627402663230896, "bits_per_byte": 0.9051507108843924, "num_chars": 2}, {"sum_logits": -1.2087104320526123, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.2087104320526123, "logits_per_char": -0.6043552160263062, "bits_per_byte": 0.8719002730971565, "num_chars": 2}, {"sum_logits": -1.6109111309051514, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.6109111309051514, "logits_per_char": -0.8054555654525757, "bits_per_byte": 1.1620267499356816, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 593, "native_id": "MCAS_2012_8_23651", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.499729037284851, "logits_per_token_corr": -1.499729037284851, "logits_per_char_corr": -0.7498645186424255, "bits_per_byte_corr": 1.0818258223847943}, "model_output": [{"sum_logits": -1.499729037284851, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.499729037284851, "logits_per_char": -0.7498645186424255, "bits_per_byte": 1.0818258223847943, "num_chars": 2}, {"sum_logits": -1.3541144132614136, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3541144132614136, "logits_per_char": -0.6770572066307068, "bits_per_byte": 0.9767870744049566, "num_chars": 2}, {"sum_logits": -1.4645270109176636, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4645270109176636, "logits_per_char": -0.7322635054588318, "bits_per_byte": 1.056432927950184, "num_chars": 2}, {"sum_logits": -1.308120608329773, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.308120608329773, "logits_per_char": -0.6540603041648865, "bits_per_byte": 0.9436095572616866, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 594, "native_id": "MCAS_2000_4_26", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.080840587615967, "logits_per_token_corr": -2.080840587615967, "logits_per_char_corr": -1.0404202938079834, "bits_per_byte_corr": 1.5010091983180942}, "model_output": [{"sum_logits": -1.3148473501205444, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3148473501205444, "logits_per_char": -0.6574236750602722, "bits_per_byte": 0.9484618757731335, "num_chars": 2}, {"sum_logits": -1.0782426595687866, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.0782426595687866, "logits_per_char": -0.5391213297843933, "bits_per_byte": 0.7777876689179665, "num_chars": 2}, {"sum_logits": -1.394654631614685, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.394654631614685, "logits_per_char": -0.6973273158073425, "bits_per_byte": 1.0060306603923879, "num_chars": 2}, {"sum_logits": -2.080840587615967, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -2.080840587615967, "logits_per_char": -1.0404202938079834, "bits_per_byte": 1.5010091983180942, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 595, "native_id": "Mercury_SC_410971", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3538193702697754, "logits_per_token_corr": -1.3538193702697754, "logits_per_char_corr": -0.6769096851348877, "bits_per_byte_corr": 0.9765742458745138}, "model_output": [{"sum_logits": -1.3538193702697754, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3538193702697754, "logits_per_char": -0.6769096851348877, "bits_per_byte": 0.9765742458745138, "num_chars": 2}, {"sum_logits": -1.0587525367736816, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.0587525367736816, "logits_per_char": -0.5293762683868408, "bits_per_byte": 0.7637285171665489, "num_chars": 2}, {"sum_logits": -1.4215703010559082, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4215703010559082, "logits_per_char": -0.7107851505279541, "bits_per_byte": 1.0254462118049315, "num_chars": 2}, {"sum_logits": -1.9242253303527832, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.9242253303527832, "logits_per_char": -0.9621126651763916, "bits_per_byte": 1.388035170827441, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 596, "native_id": "Mercury_404841", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5944337844848633, "logits_per_token_corr": -1.5944337844848633, "logits_per_char_corr": -0.7972168922424316, "bits_per_byte_corr": 1.1501408569518936}, "model_output": [{"sum_logits": -1.401200294494629, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.401200294494629, "logits_per_char": -0.7006001472473145, "bits_per_byte": 1.0107523580805042, "num_chars": 2}, {"sum_logits": -1.4799623489379883, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4799623489379883, "logits_per_char": -0.7399811744689941, "bits_per_byte": 1.0675671707583756, "num_chars": 2}, {"sum_logits": -1.1984834671020508, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.1984834671020508, "logits_per_char": -0.5992417335510254, "bits_per_byte": 0.864523077288391, "num_chars": 2}, {"sum_logits": -1.5944337844848633, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5944337844848633, "logits_per_char": -0.7972168922424316, "bits_per_byte": 1.1501408569518936, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 597, "native_id": "Mercury_416651", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3506231307983398, "logits_per_token_corr": -1.3506231307983398, "logits_per_char_corr": -0.6753115653991699, "bits_per_byte_corr": 0.9742686464570453}, "model_output": [{"sum_logits": -1.3506231307983398, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.3506231307983398, "logits_per_char": -0.6753115653991699, "bits_per_byte": 0.9742686464570453, "num_chars": 2}, {"sum_logits": -1.2770109176635742, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.2770109176635742, "logits_per_char": -0.6385054588317871, "bits_per_byte": 0.9211686590378132, "num_chars": 2}, {"sum_logits": -1.4232568740844727, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4232568740844727, "logits_per_char": -0.7116284370422363, "bits_per_byte": 1.026662817077136, "num_chars": 2}, {"sum_logits": -1.6630773544311523, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.6630773544311523, "logits_per_char": -0.8315386772155762, "bits_per_byte": 1.199656725927142, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 598, "native_id": "Mercury_416576", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0452170372009277, "logits_per_token_corr": -1.0452170372009277, "logits_per_char_corr": -0.5226085186004639, "bits_per_byte_corr": 0.7539647181117585}, "model_output": [{"sum_logits": -1.260728359222412, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.260728359222412, "logits_per_char": -0.630364179611206, "bits_per_byte": 0.9094232758797801, "num_chars": 2}, {"sum_logits": -1.0452170372009277, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": true, "logits_per_token": -1.0452170372009277, "logits_per_char": -0.5226085186004639, "bits_per_byte": 0.7539647181117585, "num_chars": 2}, {"sum_logits": -1.4466891288757324, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.4466891288757324, "logits_per_char": -0.7233445644378662, "bits_per_byte": 1.0435656159692466, "num_chars": 2}, {"sum_logits": -2.107083797454834, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -2.107083797454834, "logits_per_char": -1.053541898727417, "bits_per_byte": 1.519939672663879, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 599, "native_id": "MCAS_1998_8_24", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.079163074493408, "logits_per_token_corr": -2.079163074493408, "logits_per_char_corr": -1.039581537246704, "bits_per_byte_corr": 1.4997991283866228}, "model_output": [{"sum_logits": -1.3059543371200562, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.3059543371200562, "logits_per_char": -0.6529771685600281, "bits_per_byte": 0.942046922895946, "num_chars": 2}, {"sum_logits": -0.9602874517440796, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": true, "logits_per_token": -0.9602874517440796, "logits_per_char": -0.4801437258720398, "bits_per_byte": 0.6927009722300393, "num_chars": 2}, {"sum_logits": -1.5895599126815796, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.5895599126815796, "logits_per_char": -0.7947799563407898, "bits_per_byte": 1.146625101611628, "num_chars": 2}, {"sum_logits": -2.079163074493408, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -2.079163074493408, "logits_per_char": -1.039581537246704, "bits_per_byte": 1.4997991283866228, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 600, "native_id": "Mercury_SC_408367", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1772022247314453, "logits_per_token_corr": -1.1772022247314453, "logits_per_char_corr": -0.5886011123657227, "bits_per_byte_corr": 0.8491719058723657}, "model_output": [{"sum_logits": -1.1674003601074219, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.1674003601074219, "logits_per_char": -0.5837001800537109, "bits_per_byte": 0.8421013551300888, "num_chars": 2}, {"sum_logits": -1.1772022247314453, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.1772022247314453, "logits_per_char": -0.5886011123657227, "bits_per_byte": 0.8491719058723657, "num_chars": 2}, {"sum_logits": -1.3795967102050781, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3795967102050781, "logits_per_char": -0.6897983551025391, "bits_per_byte": 0.9951686661205122, "num_chars": 2}, {"sum_logits": -2.1684532165527344, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -2.1684532165527344, "logits_per_char": -1.0842266082763672, "bits_per_byte": 1.5642083509612994, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 601, "native_id": "Mercury_405804", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6186072826385498, "logits_per_token_corr": -1.6186072826385498, "logits_per_char_corr": -0.8093036413192749, "bits_per_byte_corr": 1.1675783499055372}, "model_output": [{"sum_logits": -1.2811439037322998, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": true, "logits_per_token": -1.2811439037322998, "logits_per_char": -0.6405719518661499, "bits_per_byte": 0.9241499782905221, "num_chars": 2}, {"sum_logits": -1.3521335124969482, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.3521335124969482, "logits_per_char": -0.6760667562484741, "bits_per_byte": 0.9753581565502619, "num_chars": 2}, {"sum_logits": -1.3937346935272217, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.3937346935272217, "logits_per_char": -0.6968673467636108, "bits_per_byte": 1.0053670653340332, "num_chars": 2}, {"sum_logits": -1.6186072826385498, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.6186072826385498, "logits_per_char": -0.8093036413192749, "bits_per_byte": 1.1675783499055372, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 602, "native_id": "Mercury_7216318", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4677886962890625, "logits_per_token_corr": -1.4677886962890625, "logits_per_char_corr": -0.7338943481445312, "bits_per_byte_corr": 1.0587857366053144}, "model_output": [{"sum_logits": -1.4677886962890625, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4677886962890625, "logits_per_char": -0.7338943481445312, "bits_per_byte": 1.0587857366053144, "num_chars": 2}, {"sum_logits": -1.0718250274658203, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.0718250274658203, "logits_per_char": -0.5359125137329102, "bits_per_byte": 0.7731583259133634, "num_chars": 2}, {"sum_logits": -1.3680038452148438, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.3680038452148438, "logits_per_char": -0.6840019226074219, "bits_per_byte": 0.986806181704953, "num_chars": 2}, {"sum_logits": -1.8363609313964844, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.8363609313964844, "logits_per_char": -0.9181804656982422, "bits_per_byte": 1.3246544045049247, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 603, "native_id": "Mercury_401312", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.0683274269104004, "logits_per_token_corr": -2.0683274269104004, "logits_per_char_corr": -1.0341637134552002, "bits_per_byte_corr": 1.4919828608702042}, "model_output": [{"sum_logits": -1.3833907842636108, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3833907842636108, "logits_per_char": -0.6916953921318054, "bits_per_byte": 0.9979055120350195, "num_chars": 2}, {"sum_logits": -1.0724910497665405, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.0724910497665405, "logits_per_char": -0.5362455248832703, "bits_per_byte": 0.7736387594485491, "num_chars": 2}, {"sum_logits": -1.3208755254745483, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3208755254745483, "logits_per_char": -0.6604377627372742, "bits_per_byte": 0.9528102851175518, "num_chars": 2}, {"sum_logits": -2.0683274269104004, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -2.0683274269104004, "logits_per_char": -1.0341637134552002, "bits_per_byte": 1.4919828608702042, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 604, "native_id": "MDSA_2013_8_23", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.141450881958008, "logits_per_token_corr": -2.141450881958008, "logits_per_char_corr": -1.070725440979004, "bits_per_byte_corr": 1.5447302838551673}, "model_output": [{"sum_logits": -1.3010729551315308, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3010729551315308, "logits_per_char": -0.6505364775657654, "bits_per_byte": 0.9385257501021784, "num_chars": 2}, {"sum_logits": -1.0493773221969604, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.0493773221969604, "logits_per_char": -0.5246886610984802, "bits_per_byte": 0.7569657293779912, "num_chars": 2}, {"sum_logits": -1.3918436765670776, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3918436765670776, "logits_per_char": -0.6959218382835388, "bits_per_byte": 1.004002984938714, "num_chars": 2}, {"sum_logits": -2.141450881958008, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -2.141450881958008, "logits_per_char": -1.070725440979004, "bits_per_byte": 1.5447302838551673, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 605, "native_id": "Mercury_SC_405880", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4023655652999878, "logits_per_token_corr": -1.4023655652999878, "logits_per_char_corr": -0.7011827826499939, "bits_per_byte_corr": 1.0115929232865968}, "model_output": [{"sum_logits": -1.4023655652999878, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4023655652999878, "logits_per_char": -0.7011827826499939, "bits_per_byte": 1.0115929232865968, "num_chars": 2}, {"sum_logits": -1.1656941175460815, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.1656941175460815, "logits_per_char": -0.5828470587730408, "bits_per_byte": 0.8408705612891882, "num_chars": 2}, {"sum_logits": -1.288695216178894, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.288695216178894, "logits_per_char": -0.644347608089447, "bits_per_byte": 0.9295970987999784, "num_chars": 2}, {"sum_logits": -1.854246973991394, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.854246973991394, "logits_per_char": -0.927123486995697, "bits_per_byte": 1.3375564569813363, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 606, "native_id": "ACTAAP_2009_5_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2439796924591064, "logits_per_token_corr": -1.2439796924591064, "logits_per_char_corr": -0.6219898462295532, "bits_per_byte_corr": 0.89734166663931}, "model_output": [{"sum_logits": -1.5603210926055908, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.5603210926055908, "logits_per_char": -0.7801605463027954, "bits_per_byte": 1.1255337512490762, "num_chars": 2}, {"sum_logits": -1.2439796924591064, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.2439796924591064, "logits_per_char": -0.6219898462295532, "bits_per_byte": 0.89734166663931, "num_chars": 2}, {"sum_logits": -1.5551235675811768, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.5551235675811768, "logits_per_char": -0.7775617837905884, "bits_per_byte": 1.1217845294602642, "num_chars": 2}, {"sum_logits": -1.4059765338897705, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4059765338897705, "logits_per_char": -0.7029882669448853, "bits_per_byte": 1.0141976865252413, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 607, "native_id": "CSZ20754", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5205633640289307, "logits_per_token_corr": -1.5205633640289307, "logits_per_char_corr": -0.7602816820144653, "bits_per_byte_corr": 1.096854612321777}, "model_output": [{"sum_logits": -1.307781457901001, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.307781457901001, "logits_per_char": -0.6538907289505005, "bits_per_byte": 0.9433649119408342, "num_chars": 2}, {"sum_logits": -1.5205633640289307, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.5205633640289307, "logits_per_char": -0.7602816820144653, "bits_per_byte": 1.096854612321777, "num_chars": 2}, {"sum_logits": -1.3695852756500244, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3695852756500244, "logits_per_char": -0.6847926378250122, "bits_per_byte": 0.9879469426281269, "num_chars": 2}, {"sum_logits": -1.4550116062164307, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4550116062164307, "logits_per_char": -0.7275058031082153, "bits_per_byte": 1.0495690143629188, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 608, "native_id": "Mercury_184363", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.864898681640625, "logits_per_token_corr": -1.864898681640625, "logits_per_char_corr": -0.9324493408203125, "bits_per_byte_corr": 1.345240039882614}, "model_output": [{"sum_logits": -1.6446285247802734, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.6446285247802734, "logits_per_char": -0.8223142623901367, "bits_per_byte": 1.1863487084033684, "num_chars": 2}, {"sum_logits": -0.9061222672462463, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -0.9061222672462463, "logits_per_char": -0.45306113362312317, "bits_per_byte": 0.6536290506980814, "num_chars": 2}, {"sum_logits": -1.455148696899414, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.455148696899414, "logits_per_char": -0.727574348449707, "bits_per_byte": 1.049667904387165, "num_chars": 2}, {"sum_logits": -1.864898681640625, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.864898681640625, "logits_per_char": -0.9324493408203125, "bits_per_byte": 1.345240039882614, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 609, "native_id": "Mercury_7188195", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4537556171417236, "logits_per_token_corr": -1.4537556171417236, "logits_per_char_corr": -0.7268778085708618, "bits_per_byte_corr": 1.048663009758173}, "model_output": [{"sum_logits": -1.174414873123169, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.174414873123169, "logits_per_char": -0.5872074365615845, "bits_per_byte": 0.8471612567011271, "num_chars": 2}, {"sum_logits": -1.2615997791290283, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.2615997791290283, "logits_per_char": -0.6307998895645142, "bits_per_byte": 0.9100518724686841, "num_chars": 2}, {"sum_logits": -1.4537556171417236, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4537556171417236, "logits_per_char": -0.7268778085708618, "bits_per_byte": 1.048663009758173, "num_chars": 2}, {"sum_logits": -1.8558990955352783, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.8558990955352783, "logits_per_char": -0.9279495477676392, "bits_per_byte": 1.338748210760491, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 610, "native_id": "Mercury_7221043", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8959245681762695, "logits_per_token_corr": -1.8959245681762695, "logits_per_char_corr": -0.9479622840881348, "bits_per_byte_corr": 1.3676204862047092}, "model_output": [{"sum_logits": -1.4521722793579102, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4521722793579102, "logits_per_char": -0.7260861396789551, "bits_per_byte": 1.0475208729737924, "num_chars": 2}, {"sum_logits": -0.9651155471801758, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -0.9651155471801758, "logits_per_char": -0.4825577735900879, "bits_per_byte": 0.6961837069013391, "num_chars": 2}, {"sum_logits": -1.4972429275512695, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4972429275512695, "logits_per_char": -0.7486214637756348, "bits_per_byte": 1.080032473292921, "num_chars": 2}, {"sum_logits": -1.8959245681762695, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.8959245681762695, "logits_per_char": -0.9479622840881348, "bits_per_byte": 1.3676204862047092, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 611, "native_id": "Mercury_7107328", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0859699249267578, "logits_per_token_corr": -1.0859699249267578, "logits_per_char_corr": -0.5429849624633789, "bits_per_byte_corr": 0.7833617126237595}, "model_output": [{"sum_logits": -1.295736312866211, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.295736312866211, "logits_per_char": -0.6478681564331055, "bits_per_byte": 0.934676176436588, "num_chars": 2}, {"sum_logits": -1.0859699249267578, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.0859699249267578, "logits_per_char": -0.5429849624633789, "bits_per_byte": 0.7833617126237595, "num_chars": 2}, {"sum_logits": -1.3296833038330078, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3296833038330078, "logits_per_char": -0.6648416519165039, "bits_per_byte": 0.9591637541970557, "num_chars": 2}, {"sum_logits": -2.163637161254883, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -2.163637161254883, "logits_per_char": -1.0818185806274414, "bits_per_byte": 1.5607343014138682, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 612, "native_id": "Mercury_415084", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3609873056411743, "logits_per_token_corr": -1.3609873056411743, "logits_per_char_corr": -0.6804936528205872, "bits_per_byte_corr": 0.9817448182813824}, "model_output": [{"sum_logits": -1.684221863746643, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.684221863746643, "logits_per_char": -0.8421109318733215, "bits_per_byte": 1.2149092652928974, "num_chars": 2}, {"sum_logits": -1.7193418741226196, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.7193418741226196, "logits_per_char": -0.8596709370613098, "bits_per_byte": 1.2402429976956109, "num_chars": 2}, {"sum_logits": -1.3609873056411743, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3609873056411743, "logits_per_char": -0.6804936528205872, "bits_per_byte": 0.9817448182813824, "num_chars": 2}, {"sum_logits": -1.0338808298110962, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.0338808298110962, "logits_per_char": -0.5169404149055481, "bits_per_byte": 0.7457873730198532, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 613, "native_id": "Mercury_415082", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7628133296966553, "logits_per_token_corr": -1.7628133296966553, "logits_per_char_corr": -0.8814066648483276, "bits_per_byte_corr": 1.2716010243840765}, "model_output": [{"sum_logits": -1.7628133296966553, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.7628133296966553, "logits_per_char": -0.8814066648483276, "bits_per_byte": 1.2716010243840765, "num_chars": 2}, {"sum_logits": -1.4774396419525146, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4774396419525146, "logits_per_char": -0.7387198209762573, "bits_per_byte": 1.0657474223295949, "num_chars": 2}, {"sum_logits": -1.2142407894134521, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.2142407894134521, "logits_per_char": -0.6071203947067261, "bits_per_byte": 0.875889582666573, "num_chars": 2}, {"sum_logits": -1.2675225734710693, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.2675225734710693, "logits_per_char": -0.6337612867355347, "bits_per_byte": 0.9143242654814211, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 614, "native_id": "Mercury_SC_416169", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1418949365615845, "logits_per_token_corr": -1.1418949365615845, "logits_per_char_corr": -0.5709474682807922, "bits_per_byte_corr": 0.8237030810973994}, "model_output": [{"sum_logits": -1.1418949365615845, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.1418949365615845, "logits_per_char": -0.5709474682807922, "bits_per_byte": 0.8237030810973994, "num_chars": 2}, {"sum_logits": -1.1947437524795532, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.1947437524795532, "logits_per_char": -0.5973718762397766, "bits_per_byte": 0.8618254434182805, "num_chars": 2}, {"sum_logits": -1.3812710046768188, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3812710046768188, "logits_per_char": -0.6906355023384094, "bits_per_byte": 0.9963764142861972, "num_chars": 2}, {"sum_logits": -2.1423068046569824, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -2.1423068046569824, "logits_per_char": -1.0711534023284912, "bits_per_byte": 1.5453477015717652, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 615, "native_id": "MEA_2011_8_13", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4406763315200806, "logits_per_token_corr": -1.4406763315200806, "logits_per_char_corr": -0.7203381657600403, "bits_per_byte_corr": 1.0392282995058089}, "model_output": [{"sum_logits": -1.4406763315200806, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.4406763315200806, "logits_per_char": -0.7203381657600403, "bits_per_byte": 1.0392282995058089, "num_chars": 2}, {"sum_logits": -1.2509046792984009, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": true, "logits_per_token": -1.2509046792984009, "logits_per_char": -0.6254523396492004, "bits_per_byte": 0.9023369887249493, "num_chars": 2}, {"sum_logits": -1.3574873208999634, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.3574873208999634, "logits_per_char": -0.6787436604499817, "bits_per_byte": 0.9792201129667145, "num_chars": 2}, {"sum_logits": -1.5717111825942993, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.5717111825942993, "logits_per_char": -0.7858555912971497, "bits_per_byte": 1.1337499644200764, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 616, "native_id": "TIMSS_2003_4_pg82", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0670253038406372, "logits_per_token_corr": -1.0670253038406372, "logits_per_char_corr": -0.5335126519203186, "bits_per_byte_corr": 0.7696960571775163}, "model_output": [{"sum_logits": -1.0670253038406372, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.0670253038406372, "logits_per_char": -0.5335126519203186, "bits_per_byte": 0.7696960571775163, "num_chars": 2}, {"sum_logits": -1.0973483324050903, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.0973483324050903, "logits_per_char": -0.5486741662025452, "bits_per_byte": 0.7915694986448675, "num_chars": 2}, {"sum_logits": -1.6020022630691528, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.6020022630691528, "logits_per_char": -0.8010011315345764, "bits_per_byte": 1.1556003602122118, "num_chars": 2}, {"sum_logits": -2.2078237533569336, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -2.2078237533569336, "logits_per_char": -1.1039118766784668, "bits_per_byte": 1.592608190063597, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 617, "native_id": "CSZ30338", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1338224411010742, "logits_per_token_corr": -1.1338224411010742, "logits_per_char_corr": -0.5669112205505371, "bits_per_byte_corr": 0.8178800065131568}, "model_output": [{"sum_logits": -1.6534090042114258, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.6534090042114258, "logits_per_char": -0.8267045021057129, "bits_per_byte": 1.1926824854693485, "num_chars": 2}, {"sum_logits": -1.5032072067260742, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.5032072067260742, "logits_per_char": -0.7516036033630371, "bits_per_byte": 1.0843347912869081, "num_chars": 2}, {"sum_logits": -1.1338224411010742, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.1338224411010742, "logits_per_char": -0.5669112205505371, "bits_per_byte": 0.8178800065131568, "num_chars": 2}, {"sum_logits": -1.435572624206543, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.435572624206543, "logits_per_char": -0.7177863121032715, "bits_per_byte": 1.0355467528901114, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 618, "native_id": "TIMSS_2003_8_pg85", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2575124502182007, "logits_per_token_corr": -1.2575124502182007, "logits_per_char_corr": -0.6287562251091003, "bits_per_byte_corr": 0.9071034878936155}, "model_output": [{"sum_logits": -1.4680207967758179, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4680207967758179, "logits_per_char": -0.7340103983879089, "bits_per_byte": 1.0589531617159293, "num_chars": 2}, {"sum_logits": -1.2575124502182007, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.2575124502182007, "logits_per_char": -0.6287562251091003, "bits_per_byte": 0.9071034878936155, "num_chars": 2}, {"sum_logits": -1.3484758138656616, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3484758138656616, "logits_per_char": -0.6742379069328308, "bits_per_byte": 0.9727196847120483, "num_chars": 2}, {"sum_logits": -1.5617612600326538, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.5617612600326538, "logits_per_char": -0.7808806300163269, "bits_per_byte": 1.1265726124516136, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 619, "native_id": "Mercury_7221988", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4726238250732422, "logits_per_token_corr": -1.4726238250732422, "logits_per_char_corr": -0.7363119125366211, "bits_per_byte_corr": 1.0622735447648146}, "model_output": [{"sum_logits": -1.1769733428955078, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.1769733428955078, "logits_per_char": -0.5884866714477539, "bits_per_byte": 0.8490068025275372, "num_chars": 2}, {"sum_logits": -1.2231616973876953, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.2231616973876953, "logits_per_char": -0.6115808486938477, "bits_per_byte": 0.8823246575139114, "num_chars": 2}, {"sum_logits": -1.4726238250732422, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4726238250732422, "logits_per_char": -0.7363119125366211, "bits_per_byte": 1.0622735447648146, "num_chars": 2}, {"sum_logits": -1.8658103942871094, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.8658103942871094, "logits_per_char": -0.9329051971435547, "bits_per_byte": 1.345897701539514, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 620, "native_id": "NCEOGA_2013_5_11", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7118518352508545, "logits_per_token_corr": -1.7118518352508545, "logits_per_char_corr": -0.8559259176254272, "bits_per_byte_corr": 1.2348400767274264}, "model_output": [{"sum_logits": -1.4936378002166748, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4936378002166748, "logits_per_char": -0.7468189001083374, "bits_per_byte": 1.0774319236292225, "num_chars": 2}, {"sum_logits": -1.3940513134002686, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3940513134002686, "logits_per_char": -0.6970256567001343, "bits_per_byte": 1.0055954582943794, "num_chars": 2}, {"sum_logits": -1.1006419658660889, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.1006419658660889, "logits_per_char": -0.5503209829330444, "bits_per_byte": 0.7939453529752135, "num_chars": 2}, {"sum_logits": -1.7118518352508545, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.7118518352508545, "logits_per_char": -0.8559259176254272, "bits_per_byte": 1.2348400767274264, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 621, "native_id": "MCAS_2013_8_29416", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2628523111343384, "logits_per_token_corr": -1.2628523111343384, "logits_per_char_corr": -0.6314261555671692, "bits_per_byte_corr": 0.9109553833249926}, "model_output": [{"sum_logits": -1.2321821451187134, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.2321821451187134, "logits_per_char": -0.6160910725593567, "bits_per_byte": 0.888831535117985, "num_chars": 2}, {"sum_logits": -1.2628523111343384, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.2628523111343384, "logits_per_char": -0.6314261555671692, "bits_per_byte": 0.9109553833249926, "num_chars": 2}, {"sum_logits": -1.432360291481018, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.432360291481018, "logits_per_char": -0.716180145740509, "bits_per_byte": 1.0332295446437099, "num_chars": 2}, {"sum_logits": -1.763170838356018, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.763170838356018, "logits_per_char": -0.881585419178009, "bits_per_byte": 1.2718589123690454, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 622, "native_id": "Mercury_SC_401142", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2595744132995605, "logits_per_token_corr": -1.2595744132995605, "logits_per_char_corr": -0.6297872066497803, "bits_per_byte_corr": 0.9085908798496035}, "model_output": [{"sum_logits": -1.4806761741638184, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4806761741638184, "logits_per_char": -0.7403380870819092, "bits_per_byte": 1.0680820868150593, "num_chars": 2}, {"sum_logits": -1.2595744132995605, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.2595744132995605, "logits_per_char": -0.6297872066497803, "bits_per_byte": 0.9085908798496035, "num_chars": 2}, {"sum_logits": -1.135322093963623, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.135322093963623, "logits_per_char": -0.5676610469818115, "bits_per_byte": 0.8189617773870848, "num_chars": 2}, {"sum_logits": -1.8855376243591309, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.8855376243591309, "logits_per_char": -0.9427688121795654, "bits_per_byte": 1.3601278900372147, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 623, "native_id": "Mercury_7206395", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1622675657272339, "logits_per_token_corr": -1.1622675657272339, "logits_per_char_corr": -0.5811337828636169, "bits_per_byte_corr": 0.8383988266309862}, "model_output": [{"sum_logits": -1.4711836576461792, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4711836576461792, "logits_per_char": -0.7355918288230896, "bits_per_byte": 1.061234683562277, "num_chars": 2}, {"sum_logits": -1.2166308164596558, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.2166308164596558, "logits_per_char": -0.6083154082298279, "bits_per_byte": 0.8776136227501485, "num_chars": 2}, {"sum_logits": -1.1622675657272339, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.1622675657272339, "logits_per_char": -0.5811337828636169, "bits_per_byte": 0.8383988266309862, "num_chars": 2}, {"sum_logits": -1.9099196195602417, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.9099196195602417, "logits_per_char": -0.9549598097801208, "bits_per_byte": 1.377715781819038, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 624, "native_id": "Mercury_179025", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6324864625930786, "logits_per_token_corr": -1.6324864625930786, "logits_per_char_corr": -0.8162432312965393, "bits_per_byte_corr": 1.1775900619515465}, "model_output": [{"sum_logits": -1.422369122505188, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.422369122505188, "logits_per_char": -0.711184561252594, "bits_per_byte": 1.0260224396766477, "num_chars": 2}, {"sum_logits": -1.6324864625930786, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.6324864625930786, "logits_per_char": -0.8162432312965393, "bits_per_byte": 1.1775900619515465, "num_chars": 2}, {"sum_logits": -1.0689622163772583, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.0689622163772583, "logits_per_char": -0.5344811081886292, "bits_per_byte": 0.7710932442331268, "num_chars": 2}, {"sum_logits": -1.5938950777053833, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5938950777053833, "logits_per_char": -0.7969475388526917, "bits_per_byte": 1.1497522621522687, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 625, "native_id": "Mercury_7130620", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3661829233169556, "logits_per_token_corr": -1.3661829233169556, "logits_per_char_corr": -0.6830914616584778, "bits_per_byte_corr": 0.9854926642089874}, "model_output": [{"sum_logits": -1.3259302377700806, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3259302377700806, "logits_per_char": -0.6629651188850403, "bits_per_byte": 0.9564564892984968, "num_chars": 2}, {"sum_logits": -1.3661829233169556, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3661829233169556, "logits_per_char": -0.6830914616584778, "bits_per_byte": 0.9854926642089874, "num_chars": 2}, {"sum_logits": -1.3025480508804321, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.3025480508804321, "logits_per_char": -0.6512740254402161, "bits_per_byte": 0.9395898067630674, "num_chars": 2}, {"sum_logits": -1.6895776987075806, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.6895776987075806, "logits_per_char": -0.8447888493537903, "bits_per_byte": 1.2187726835618824, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 626, "native_id": "Mercury_177870", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.398494839668274, "logits_per_token_corr": -1.398494839668274, "logits_per_char_corr": -0.699247419834137, "bits_per_byte_corr": 1.008800784949837}, "model_output": [{"sum_logits": -1.2828656435012817, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.2828656435012817, "logits_per_char": -0.6414328217506409, "bits_per_byte": 0.9253919510037288, "num_chars": 2}, {"sum_logits": -1.1320744752883911, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.1320744752883911, "logits_per_char": -0.5660372376441956, "bits_per_byte": 0.8166191157083553, "num_chars": 2}, {"sum_logits": -1.398494839668274, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.398494839668274, "logits_per_char": -0.699247419834137, "bits_per_byte": 1.008800784949837, "num_chars": 2}, {"sum_logits": -1.9566422700881958, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.9566422700881958, "logits_per_char": -0.9783211350440979, "bits_per_byte": 1.411419049925996, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 627, "native_id": "Mercury_7282083", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2502630949020386, "logits_per_token_corr": -1.2502630949020386, "logits_per_char_corr": -0.6251315474510193, "bits_per_byte_corr": 0.9018741834114772}, "model_output": [{"sum_logits": -1.3497828245162964, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3497828245162964, "logits_per_char": -0.6748914122581482, "bits_per_byte": 0.9736624936040789, "num_chars": 2}, {"sum_logits": -1.2502630949020386, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.2502630949020386, "logits_per_char": -0.6251315474510193, "bits_per_byte": 0.9018741834114772, "num_chars": 2}, {"sum_logits": -1.3258455991744995, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3258455991744995, "logits_per_char": -0.6629227995872498, "bits_per_byte": 0.9563954354574404, "num_chars": 2}, {"sum_logits": -1.7509077787399292, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.7509077787399292, "logits_per_char": -0.8754538893699646, "bits_per_byte": 1.2630129847219105, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 628, "native_id": "Mercury_SC_400233", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2563519477844238, "logits_per_token_corr": -1.2563519477844238, "logits_per_char_corr": -0.6281759738922119, "bits_per_byte_corr": 0.9062663623405401}, "model_output": [{"sum_logits": -1.6831517219543457, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.6831517219543457, "logits_per_char": -0.8415758609771729, "bits_per_byte": 1.2141373211644992, "num_chars": 2}, {"sum_logits": -1.1938328742980957, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.1938328742980957, "logits_per_char": -0.5969164371490479, "bits_per_byte": 0.8611683837006586, "num_chars": 2}, {"sum_logits": -1.2563519477844238, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.2563519477844238, "logits_per_char": -0.6281759738922119, "bits_per_byte": 0.9062663623405401, "num_chars": 2}, {"sum_logits": -1.577709674835205, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.577709674835205, "logits_per_char": -0.7888548374176025, "bits_per_byte": 1.1380769619244624, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 629, "native_id": "Mercury_7082443", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0272842645645142, "logits_per_token_corr": -1.0272842645645142, "logits_per_char_corr": -0.5136421322822571, "bits_per_byte_corr": 0.7410289570357776}, "model_output": [{"sum_logits": -1.232699990272522, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.232699990272522, "logits_per_char": -0.616349995136261, "bits_per_byte": 0.8892050814356592, "num_chars": 2}, {"sum_logits": -1.0272842645645142, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.0272842645645142, "logits_per_char": -0.5136421322822571, "bits_per_byte": 0.7410289570357776, "num_chars": 2}, {"sum_logits": -1.5571609735488892, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.5571609735488892, "logits_per_char": -0.7785804867744446, "bits_per_byte": 1.1232542072032135, "num_chars": 2}, {"sum_logits": -2.04378080368042, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -2.04378080368042, "logits_per_char": -1.02189040184021, "bits_per_byte": 1.47427621506796, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 630, "native_id": "NCEOGA_2013_8_15", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2125123739242554, "logits_per_token_corr": -1.2125123739242554, "logits_per_char_corr": -0.6062561869621277, "bits_per_byte_corr": 0.8746427944391422}, "model_output": [{"sum_logits": -1.4738916158676147, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4738916158676147, "logits_per_char": -0.7369458079338074, "bits_per_byte": 1.0631880625107781, "num_chars": 2}, {"sum_logits": -1.2125123739242554, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.2125123739242554, "logits_per_char": -0.6062561869621277, "bits_per_byte": 0.8746427944391422, "num_chars": 2}, {"sum_logits": -1.2542985677719116, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.2542985677719116, "logits_per_char": -0.6271492838859558, "bits_per_byte": 0.9047851617599831, "num_chars": 2}, {"sum_logits": -1.7187951803207397, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.7187951803207397, "logits_per_char": -0.8593975901603699, "bits_per_byte": 1.239848641477182, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 631, "native_id": "Mercury_7210140", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5783982276916504, "logits_per_token_corr": -1.5783982276916504, "logits_per_char_corr": -0.7891991138458252, "bits_per_byte_corr": 1.1385736478201545}, "model_output": [{"sum_logits": -1.5783982276916504, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.5783982276916504, "logits_per_char": -0.7891991138458252, "bits_per_byte": 1.1385736478201545, "num_chars": 2}, {"sum_logits": -1.033825397491455, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -1.033825397491455, "logits_per_char": -0.5169126987457275, "bits_per_byte": 0.7457473870535276, "num_chars": 2}, {"sum_logits": -1.3305115699768066, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.3305115699768066, "logits_per_char": -0.6652557849884033, "bits_per_byte": 0.9597612219261535, "num_chars": 2}, {"sum_logits": -1.8824830055236816, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.8824830055236816, "logits_per_char": -0.9412415027618408, "bits_per_byte": 1.3579244483143589, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 632, "native_id": "Mercury_7106593", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.853689193725586, "logits_per_token_corr": -1.853689193725586, "logits_per_char_corr": -0.926844596862793, "bits_per_byte_corr": 1.3371541035696426}, "model_output": [{"sum_logits": -1.501708984375, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.501708984375, "logits_per_char": -0.7508544921875, "bits_per_byte": 1.0832540523088854, "num_chars": 2}, {"sum_logits": -1.083028793334961, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -1.083028793334961, "logits_per_char": -0.5415143966674805, "bits_per_byte": 0.7812401346427144, "num_chars": 2}, {"sum_logits": -1.3001651763916016, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.3001651763916016, "logits_per_char": -0.6500825881958008, "bits_per_byte": 0.9378709261590178, "num_chars": 2}, {"sum_logits": -1.853689193725586, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.853689193725586, "logits_per_char": -0.926844596862793, "bits_per_byte": 1.3371541035696426, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 633, "native_id": "Mercury_416536", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2095247507095337, "logits_per_token_corr": -1.2095247507095337, "logits_per_char_corr": -0.6047623753547668, "bits_per_byte_corr": 0.8724876798411788}, "model_output": [{"sum_logits": -1.4643503427505493, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4643503427505493, "logits_per_char": -0.7321751713752747, "bits_per_byte": 1.0563054888058945, "num_chars": 2}, {"sum_logits": -1.2095247507095337, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.2095247507095337, "logits_per_char": -0.6047623753547668, "bits_per_byte": 0.8724876798411788, "num_chars": 2}, {"sum_logits": -1.2952848672866821, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.2952848672866821, "logits_per_char": -0.6476424336433411, "bits_per_byte": 0.9343505272871789, "num_chars": 2}, {"sum_logits": -1.7027918100357056, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.7027918100357056, "logits_per_char": -0.8513959050178528, "bits_per_byte": 1.2283046500033097, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 634, "native_id": "Mercury_410026", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6310462951660156, "logits_per_token_corr": -1.6310462951660156, "logits_per_char_corr": -0.8155231475830078, "bits_per_byte_corr": 1.1765512007490089}, "model_output": [{"sum_logits": -1.6655712127685547, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.6655712127685547, "logits_per_char": -0.8327856063842773, "bits_per_byte": 1.2014556644551684, "num_chars": 2}, {"sum_logits": -1.1324634552001953, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": true, "logits_per_token": -1.1324634552001953, "logits_per_char": -0.5662317276000977, "bits_per_byte": 0.8168997054032382, "num_chars": 2}, {"sum_logits": -1.3122596740722656, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.3122596740722656, "logits_per_char": -0.6561298370361328, "bits_per_byte": 0.9465952620719926, "num_chars": 2}, {"sum_logits": -1.6310462951660156, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.6310462951660156, "logits_per_char": -0.8155231475830078, "bits_per_byte": 1.1765512007490089, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 635, "native_id": "ACTAAP_2011_5_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0870959758758545, "logits_per_token_corr": -1.0870959758758545, "logits_per_char_corr": -0.5435479879379272, "bits_per_byte_corr": 0.7841739866837851}, "model_output": [{"sum_logits": -1.3430449962615967, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.3430449962615967, "logits_per_char": -0.6715224981307983, "bits_per_byte": 0.968802177899367, "num_chars": 2}, {"sum_logits": -1.0870959758758545, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": true, "logits_per_token": -1.0870959758758545, "logits_per_char": -0.5435479879379272, "bits_per_byte": 0.7841739866837851, "num_chars": 2}, {"sum_logits": -1.3641021251678467, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.3641021251678467, "logits_per_char": -0.6820510625839233, "bits_per_byte": 0.9839916856235812, "num_chars": 2}, {"sum_logits": -2.045677900314331, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -2.045677900314331, "logits_per_char": -1.0228389501571655, "bits_per_byte": 1.4756446810208765, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 636, "native_id": "Mercury_417138", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1288360357284546, "logits_per_token_corr": -1.1288360357284546, "logits_per_char_corr": -0.5644180178642273, "bits_per_byte_corr": 0.8142830753616841}, "model_output": [{"sum_logits": -1.369362235069275, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.369362235069275, "logits_per_char": -0.6846811175346375, "bits_per_byte": 0.9877860528582446, "num_chars": 2}, {"sum_logits": -1.1288360357284546, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": true, "logits_per_token": -1.1288360357284546, "logits_per_char": -0.5644180178642273, "bits_per_byte": 0.8142830753616841, "num_chars": 2}, {"sum_logits": -1.3743079900741577, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.3743079900741577, "logits_per_char": -0.6871539950370789, "bits_per_byte": 0.9913536609677452, "num_chars": 2}, {"sum_logits": -1.8577712774276733, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.8577712774276733, "logits_per_char": -0.9288856387138367, "bits_per_byte": 1.3400987045263923, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 637, "native_id": "Mercury_7138915", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.575373888015747, "logits_per_token_corr": -1.575373888015747, "logits_per_char_corr": -0.7876869440078735, "bits_per_byte_corr": 1.1363920478939582}, "model_output": [{"sum_logits": -2.0459110736846924, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -2.0459110736846924, "logits_per_char": -1.0229555368423462, "bits_per_byte": 1.4758128800534205, "num_chars": 2}, {"sum_logits": -0.9780228734016418, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": true, "logits_per_token": -0.9780228734016418, "logits_per_char": -0.4890114367008209, "bits_per_byte": 0.7054943746667685, "num_chars": 2}, {"sum_logits": -1.2813637256622314, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -1.2813637256622314, "logits_per_char": -0.6406818628311157, "bits_per_byte": 0.9243085462946178, "num_chars": 2}, {"sum_logits": -1.575373888015747, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -1.575373888015747, "logits_per_char": -0.7876869440078735, "bits_per_byte": 1.1363920478939582, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 638, "native_id": "NYSEDREGENTS_2008_4_11", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2759208679199219, "logits_per_token_corr": -1.2759208679199219, "logits_per_char_corr": -0.6379604339599609, "bits_per_byte_corr": 0.9203823543580679}, "model_output": [{"sum_logits": -1.3736934661865234, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.3736934661865234, "logits_per_char": -0.6868467330932617, "bits_per_byte": 0.9909103756851461, "num_chars": 2}, {"sum_logits": -1.3502826690673828, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.3502826690673828, "logits_per_char": -0.6751413345336914, "bits_per_byte": 0.974023055231613, "num_chars": 2}, {"sum_logits": -1.2759208679199219, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.2759208679199219, "logits_per_char": -0.6379604339599609, "bits_per_byte": 0.9203823543580679, "num_chars": 2}, {"sum_logits": -1.6546077728271484, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.6546077728271484, "logits_per_char": -0.8273038864135742, "bits_per_byte": 1.1935472142378873, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 639, "native_id": "Mercury_404435", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6405540704727173, "logits_per_token_corr": -1.6405540704727173, "logits_per_char_corr": -0.8202770352363586, "bits_per_byte_corr": 1.1834096108914463}, "model_output": [{"sum_logits": -1.8665558099746704, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.8665558099746704, "logits_per_char": -0.9332779049873352, "bits_per_byte": 1.346435405297437, "num_chars": 2}, {"sum_logits": -1.1936756372451782, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.1936756372451782, "logits_per_char": -0.5968378186225891, "bits_per_byte": 0.8610549611424145, "num_chars": 2}, {"sum_logits": -1.1319366693496704, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.1319366693496704, "logits_per_char": -0.5659683346748352, "bits_per_byte": 0.8165197097361565, "num_chars": 2}, {"sum_logits": -1.6405540704727173, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.6405540704727173, "logits_per_char": -0.8202770352363586, "bits_per_byte": 1.1834096108914463, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 640, "native_id": "MDSA_2009_5_25", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.244611382484436, "logits_per_token_corr": -1.244611382484436, "logits_per_char_corr": -0.622305691242218, "bits_per_byte_corr": 0.8977973346727713}, "model_output": [{"sum_logits": -1.3782764673233032, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.3782764673233032, "logits_per_char": -0.6891382336616516, "bits_per_byte": 0.9942163121913588, "num_chars": 2}, {"sum_logits": -1.244611382484436, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.244611382484436, "logits_per_char": -0.622305691242218, "bits_per_byte": 0.8977973346727713, "num_chars": 2}, {"sum_logits": -1.3084160089492798, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.3084160089492798, "logits_per_char": -0.6542080044746399, "bits_per_byte": 0.9438226437661058, "num_chars": 2}, {"sum_logits": -1.744998574256897, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.744998574256897, "logits_per_char": -0.8724992871284485, "bits_per_byte": 1.2587503947202727, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 641, "native_id": "OHAT_2007_8_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.332472562789917, "logits_per_token_corr": -1.332472562789917, "logits_per_char_corr": -0.6662362813949585, "bits_per_byte_corr": 0.9611757792295011}, "model_output": [{"sum_logits": -1.6263339519500732, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.6263339519500732, "logits_per_char": -0.8131669759750366, "bits_per_byte": 1.173151963654703, "num_chars": 2}, {"sum_logits": -0.9695289134979248, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -0.9695289134979248, "logits_per_char": -0.4847644567489624, "bits_per_byte": 0.6993672777514629, "num_chars": 2}, {"sum_logits": -1.332472562789917, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.332472562789917, "logits_per_char": -0.6662362813949585, "bits_per_byte": 0.9611757792295011, "num_chars": 2}, {"sum_logits": -1.8950049877166748, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.8950049877166748, "logits_per_char": -0.9475024938583374, "bits_per_byte": 1.366957149120331, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 642, "native_id": "Mercury_LBS10302", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4744553565979004, "logits_per_token_corr": -1.4744553565979004, "logits_per_char_corr": -0.7372276782989502, "bits_per_byte_corr": 1.0635947154887437}, "model_output": [{"sum_logits": -1.3728032112121582, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3728032112121582, "logits_per_char": -0.6864016056060791, "bits_per_byte": 0.9902681924668238, "num_chars": 2}, {"sum_logits": -1.2097172737121582, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.2097172737121582, "logits_per_char": -0.6048586368560791, "bits_per_byte": 0.8726265558317506, "num_chars": 2}, {"sum_logits": -1.4744553565979004, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4744553565979004, "logits_per_char": -0.7372276782989502, "bits_per_byte": 1.0635947154887437, "num_chars": 2}, {"sum_logits": -1.6022686958312988, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.6022686958312988, "logits_per_char": -0.8011343479156494, "bits_per_byte": 1.1557925508245512, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 643, "native_id": "Mercury_7027248", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4215071201324463, "logits_per_token_corr": -1.4215071201324463, "logits_per_char_corr": -0.7107535600662231, "bits_per_byte_corr": 1.0254006364024528}, "model_output": [{"sum_logits": -1.5920794010162354, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5920794010162354, "logits_per_char": -0.7960397005081177, "bits_per_byte": 1.148442528274622, "num_chars": 2}, {"sum_logits": -1.4215071201324463, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4215071201324463, "logits_per_char": -0.7107535600662231, "bits_per_byte": 1.0254006364024528, "num_chars": 2}, {"sum_logits": -1.3346598148345947, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3346598148345947, "logits_per_char": -0.6673299074172974, "bits_per_byte": 0.9627535480685177, "num_chars": 2}, {"sum_logits": -1.3112585544586182, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.3112585544586182, "logits_per_char": -0.6556292772293091, "bits_per_byte": 0.9458731069210191, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 644, "native_id": "Mercury_SC_401360", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2951878309249878, "logits_per_token_corr": -1.2951878309249878, "logits_per_char_corr": -0.6475939154624939, "bits_per_byte_corr": 0.9342805303482777}, "model_output": [{"sum_logits": -1.2951878309249878, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.2951878309249878, "logits_per_char": -0.6475939154624939, "bits_per_byte": 0.9342805303482777, "num_chars": 2}, {"sum_logits": -1.2041081190109253, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.2041081190109253, "logits_per_char": -0.6020540595054626, "bits_per_byte": 0.8685804059962239, "num_chars": 2}, {"sum_logits": -1.3504246473312378, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.3504246473312378, "logits_per_char": -0.6752123236656189, "bits_per_byte": 0.9741254709002019, "num_chars": 2}, {"sum_logits": -1.912150263786316, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.912150263786316, "logits_per_char": -0.956075131893158, "bits_per_byte": 1.3793248515005117, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 645, "native_id": "ACTAAP_2013_5_17", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4178768396377563, "logits_per_token_corr": -1.4178768396377563, "logits_per_char_corr": -0.7089384198188782, "bits_per_byte_corr": 1.0227819425690883}, "model_output": [{"sum_logits": -1.4178768396377563, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4178768396377563, "logits_per_char": -0.7089384198188782, "bits_per_byte": 1.0227819425690883, "num_chars": 2}, {"sum_logits": -0.9916244745254517, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -0.9916244745254517, "logits_per_char": -0.49581223726272583, "bits_per_byte": 0.7153058559115105, "num_chars": 2}, {"sum_logits": -1.4812484979629517, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4812484979629517, "logits_per_char": -0.7406242489814758, "bits_per_byte": 1.0684949311684557, "num_chars": 2}, {"sum_logits": -1.9118133783340454, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.9118133783340454, "logits_per_char": -0.9559066891670227, "bits_per_byte": 1.3790818400148421, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 646, "native_id": "Mercury_407125", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.363051176071167, "logits_per_token_corr": -1.363051176071167, "logits_per_char_corr": -0.6815255880355835, "bits_per_byte_corr": 0.9832335860985774}, "model_output": [{"sum_logits": -1.3546454906463623, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3546454906463623, "logits_per_char": -0.6773227453231812, "bits_per_byte": 0.9771701657597538, "num_chars": 2}, {"sum_logits": -1.100987195968628, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.100987195968628, "logits_per_char": -0.550493597984314, "bits_per_byte": 0.794194383853663, "num_chars": 2}, {"sum_logits": -1.363051176071167, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.363051176071167, "logits_per_char": -0.6815255880355835, "bits_per_byte": 0.9832335860985774, "num_chars": 2}, {"sum_logits": -1.9981753826141357, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.9981753826141357, "logits_per_char": -0.9990876913070679, "bits_per_byte": 1.4413788576629458, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 647, "native_id": "Mercury_404820", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.597503900527954, "logits_per_token_corr": -1.597503900527954, "logits_per_char_corr": -0.798751950263977, "bits_per_byte_corr": 1.1523554775470555}, "model_output": [{"sum_logits": -1.422027826309204, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.422027826309204, "logits_per_char": -0.711013913154602, "bits_per_byte": 1.0257762465119373, "num_chars": 2}, {"sum_logits": -1.335603952407837, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.335603952407837, "logits_per_char": -0.6678019762039185, "bits_per_byte": 0.9634345993659349, "num_chars": 2}, {"sum_logits": -1.504253625869751, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.504253625869751, "logits_per_char": -0.7521268129348755, "bits_per_byte": 1.0850896231415454, "num_chars": 2}, {"sum_logits": -1.597503900527954, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.597503900527954, "logits_per_char": -0.798751950263977, "bits_per_byte": 1.1523554775470555, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 648, "native_id": "Mercury_SC_416168", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.249258041381836, "logits_per_token_corr": -1.249258041381836, "logits_per_char_corr": -0.624629020690918, "bits_per_byte_corr": 0.9011491905467645}, "model_output": [{"sum_logits": -1.3423709869384766, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.3423709869384766, "logits_per_char": -0.6711854934692383, "bits_per_byte": 0.9683159829453775, "num_chars": 2}, {"sum_logits": -1.249258041381836, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.249258041381836, "logits_per_char": -0.624629020690918, "bits_per_byte": 0.9011491905467645, "num_chars": 2}, {"sum_logits": -1.2253551483154297, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": true, "logits_per_token": -1.2253551483154297, "logits_per_char": -0.6126775741577148, "bits_per_byte": 0.8839068979018504, "num_chars": 2}, {"sum_logits": -1.9214973449707031, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.9214973449707031, "logits_per_char": -0.9607486724853516, "bits_per_byte": 1.3860673453362673, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 649, "native_id": "TIMSS_1995_8_K18", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3010095357894897, "logits_per_token_corr": -1.3010095357894897, "logits_per_char_corr": -0.6505047678947449, "bits_per_byte_corr": 0.9384800027170488}, "model_output": [{"sum_logits": -1.3010095357894897, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3010095357894897, "logits_per_char": -0.6505047678947449, "bits_per_byte": 0.9384800027170488, "num_chars": 2}, {"sum_logits": -1.1288617849349976, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.1288617849349976, "logits_per_char": -0.5644308924674988, "bits_per_byte": 0.8143016494879772, "num_chars": 2}, {"sum_logits": -1.3926252126693726, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3926252126693726, "logits_per_char": -0.6963126063346863, "bits_per_byte": 1.0045667440682426, "num_chars": 2}, {"sum_logits": -1.9087671041488647, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.9087671041488647, "logits_per_char": -0.9543835520744324, "bits_per_byte": 1.3768844176847665, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 650, "native_id": "Mercury_SC_405130", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5075287818908691, "logits_per_token_corr": -1.5075287818908691, "logits_per_char_corr": -0.7537643909454346, "bits_per_byte_corr": 1.0874521488164497}, "model_output": [{"sum_logits": -1.5075287818908691, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.5075287818908691, "logits_per_char": -0.7537643909454346, "bits_per_byte": 1.0874521488164497, "num_chars": 2}, {"sum_logits": -1.1314187049865723, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.1314187049865723, "logits_per_char": -0.5657093524932861, "bits_per_byte": 0.8161460774271568, "num_chars": 2}, {"sum_logits": -1.275585651397705, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.275585651397705, "logits_per_char": -0.6377928256988525, "bits_per_byte": 0.9201405467509547, "num_chars": 2}, {"sum_logits": -1.8327393531799316, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.8327393531799316, "logits_per_char": -0.9163696765899658, "bits_per_byte": 1.3220419880383167, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 651, "native_id": "Mercury_SC_408631", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5352054834365845, "logits_per_token_corr": -1.5352054834365845, "logits_per_char_corr": -0.7676027417182922, "bits_per_byte_corr": 1.1074166688505476}, "model_output": [{"sum_logits": -1.2116791009902954, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.2116791009902954, "logits_per_char": -0.6058395504951477, "bits_per_byte": 0.8740417150743762, "num_chars": 2}, {"sum_logits": -0.9895874261856079, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": true, "logits_per_token": -0.9895874261856079, "logits_per_char": -0.49479371309280396, "bits_per_byte": 0.7138364361425377, "num_chars": 2}, {"sum_logits": -1.5352054834365845, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.5352054834365845, "logits_per_char": -0.7676027417182922, "bits_per_byte": 1.1074166688505476, "num_chars": 2}, {"sum_logits": -2.2523322105407715, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -2.2523322105407715, "logits_per_char": -1.1261661052703857, "bits_per_byte": 1.624714255291991, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 652, "native_id": "Mercury_SC_408763", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2470232248306274, "logits_per_token_corr": -1.2470232248306274, "logits_per_char_corr": -0.6235116124153137, "bits_per_byte_corr": 0.8995371111689008}, "model_output": [{"sum_logits": -1.5358625650405884, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.5358625650405884, "logits_per_char": -0.7679312825202942, "bits_per_byte": 1.107890653036326, "num_chars": 2}, {"sum_logits": -1.1028467416763306, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.1028467416763306, "logits_per_char": -0.5514233708381653, "bits_per_byte": 0.7955357625390684, "num_chars": 2}, {"sum_logits": -1.2470232248306274, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.2470232248306274, "logits_per_char": -0.6235116124153137, "bits_per_byte": 0.8995371111689008, "num_chars": 2}, {"sum_logits": -1.8912359476089478, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.8912359476089478, "logits_per_char": -0.9456179738044739, "bits_per_byte": 1.3642383613841642, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 653, "native_id": "MCAS_8_2015_18", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.439068078994751, "logits_per_token_corr": -1.439068078994751, "logits_per_char_corr": -0.7195340394973755, "bits_per_byte_corr": 1.038068190534413}, "model_output": [{"sum_logits": -1.439068078994751, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.439068078994751, "logits_per_char": -0.7195340394973755, "bits_per_byte": 1.038068190534413, "num_chars": 2}, {"sum_logits": -1.284588098526001, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.284588098526001, "logits_per_char": -0.6422940492630005, "bits_per_byte": 0.9266344396648881, "num_chars": 2}, {"sum_logits": -1.4004595279693604, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4004595279693604, "logits_per_char": -0.7002297639846802, "bits_per_byte": 1.010218007984273, "num_chars": 2}, {"sum_logits": -1.5343515872955322, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.5343515872955322, "logits_per_char": -0.7671757936477661, "bits_per_byte": 1.106800712986482, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 654, "native_id": "Mercury_411729", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0704293251037598, "logits_per_token_corr": -1.0704293251037598, "logits_per_char_corr": -0.5352146625518799, "bits_per_byte_corr": 0.7721515394752119}, "model_output": [{"sum_logits": -1.714134693145752, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.714134693145752, "logits_per_char": -0.857067346572876, "bits_per_byte": 1.236486810609439, "num_chars": 2}, {"sum_logits": -1.0704293251037598, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.0704293251037598, "logits_per_char": -0.5352146625518799, "bits_per_byte": 0.7721515394752119, "num_chars": 2}, {"sum_logits": -1.5215630531311035, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5215630531311035, "logits_per_char": -0.7607815265655518, "bits_per_byte": 1.0975757355768452, "num_chars": 2}, {"sum_logits": -1.7129387855529785, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.7129387855529785, "logits_per_char": -0.8564693927764893, "bits_per_byte": 1.2356241456327106, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 655, "native_id": "MDSA_2012_8_6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4471217393875122, "logits_per_token_corr": -1.4471217393875122, "logits_per_char_corr": -0.7235608696937561, "bits_per_byte_corr": 1.0438776784892374}, "model_output": [{"sum_logits": -1.1741496324539185, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.1741496324539185, "logits_per_char": -0.5870748162269592, "bits_per_byte": 0.8469699260020421, "num_chars": 2}, {"sum_logits": -1.2051326036453247, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.2051326036453247, "logits_per_char": -0.6025663018226624, "bits_per_byte": 0.8693194154469819, "num_chars": 2}, {"sum_logits": -1.4471217393875122, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4471217393875122, "logits_per_char": -0.7235608696937561, "bits_per_byte": 1.0438776784892374, "num_chars": 2}, {"sum_logits": -1.9577780961990356, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.9577780961990356, "logits_per_char": -0.9788890480995178, "bits_per_byte": 1.412238375274707, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 656, "native_id": "MCAS_1999_8_5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1700265407562256, "logits_per_token_corr": -1.1700265407562256, "logits_per_char_corr": -0.5850132703781128, "bits_per_byte_corr": 0.843995744029344}, "model_output": [{"sum_logits": -1.323545217514038, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.323545217514038, "logits_per_char": -0.661772608757019, "bits_per_byte": 0.9547360608505895, "num_chars": 2}, {"sum_logits": -1.1700265407562256, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.1700265407562256, "logits_per_char": -0.5850132703781128, "bits_per_byte": 0.843995744029344, "num_chars": 2}, {"sum_logits": -1.4402978420257568, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4402978420257568, "logits_per_char": -0.7201489210128784, "bits_per_byte": 1.0389552770475639, "num_chars": 2}, {"sum_logits": -1.842609167098999, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.842609167098999, "logits_per_char": -0.9213045835494995, "bits_per_byte": 1.3291615538360895, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 657, "native_id": "WASL_2004_8_17", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.671072244644165, "logits_per_token_corr": -1.671072244644165, "logits_per_char_corr": -0.8355361223220825, "bits_per_byte_corr": 1.2054238201585288}, "model_output": [{"sum_logits": -1.3301050662994385, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": false, "logits_per_token": -1.3301050662994385, "logits_per_char": -0.6650525331497192, "bits_per_byte": 0.9594679915064322, "num_chars": 2}, {"sum_logits": -1.2034361362457275, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": true, "logits_per_token": -1.2034361362457275, "logits_per_char": -0.6017180681228638, "bits_per_byte": 0.8680956728947666, "num_chars": 2}, {"sum_logits": -1.4663355350494385, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": false, "logits_per_token": -1.4663355350494385, "logits_per_char": -0.7331677675247192, "bits_per_byte": 1.0577375023483047, "num_chars": 2}, {"sum_logits": -1.671072244644165, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": false, "logits_per_token": -1.671072244644165, "logits_per_char": -0.8355361223220825, "bits_per_byte": 1.2054238201585288, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 658, "native_id": "Mercury_414365", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1954759359359741, "logits_per_token_corr": -1.1954759359359741, "logits_per_char_corr": -0.5977379679679871, "bits_per_byte_corr": 0.8623536021390805}, "model_output": [{"sum_logits": -1.5232652425765991, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.5232652425765991, "logits_per_char": -0.7616326212882996, "bits_per_byte": 1.098803605712681, "num_chars": 2}, {"sum_logits": -1.356284499168396, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.356284499168396, "logits_per_char": -0.678142249584198, "bits_per_byte": 0.9783524604931111, "num_chars": 2}, {"sum_logits": -1.1954759359359741, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.1954759359359741, "logits_per_char": -0.5977379679679871, "bits_per_byte": 0.8623536021390805, "num_chars": 2}, {"sum_logits": -1.5732873678207397, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.5732873678207397, "logits_per_char": -0.7866436839103699, "bits_per_byte": 1.1348869417249312, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 659, "native_id": "Mercury_SC_415406", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1379340887069702, "logits_per_token_corr": -1.1379340887069702, "logits_per_char_corr": -0.5689670443534851, "bits_per_byte_corr": 0.8208459333186136}, "model_output": [{"sum_logits": -1.237892508506775, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.237892508506775, "logits_per_char": -0.6189462542533875, "bits_per_byte": 0.892950691588803, "num_chars": 2}, {"sum_logits": -1.1379340887069702, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.1379340887069702, "logits_per_char": -0.5689670443534851, "bits_per_byte": 0.8208459333186136, "num_chars": 2}, {"sum_logits": -1.418157935142517, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.418157935142517, "logits_per_char": -0.7090789675712585, "bits_per_byte": 1.0229847101144558, "num_chars": 2}, {"sum_logits": -2.0031285285949707, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -2.0031285285949707, "logits_per_char": -1.0015642642974854, "bits_per_byte": 1.4449517972346233, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 660, "native_id": "MCAS_2000_8_29", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2779653072357178, "logits_per_token_corr": -1.2779653072357178, "logits_per_char_corr": -0.6389826536178589, "bits_per_byte_corr": 0.9218571055892175}, "model_output": [{"sum_logits": -1.2949368953704834, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.2949368953704834, "logits_per_char": -0.6474684476852417, "bits_per_byte": 0.9340995186082446, "num_chars": 2}, {"sum_logits": -1.2779653072357178, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": true, "logits_per_token": -1.2779653072357178, "logits_per_char": -0.6389826536178589, "bits_per_byte": 0.9218571055892175, "num_chars": 2}, {"sum_logits": -1.2820641994476318, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.2820641994476318, "logits_per_char": -0.6410320997238159, "bits_per_byte": 0.9248138313228531, "num_chars": 2}, {"sum_logits": -1.866513967514038, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.866513967514038, "logits_per_char": -0.933256983757019, "bits_per_byte": 1.3464052223422105, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 661, "native_id": "Mercury_416230", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.343496322631836, "logits_per_token_corr": -1.343496322631836, "logits_per_char_corr": -0.671748161315918, "bits_per_byte_corr": 0.9691277410574505}, "model_output": [{"sum_logits": -1.9583816528320312, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.9583816528320312, "logits_per_char": -0.9791908264160156, "bits_per_byte": 1.4126737493553665, "num_chars": 2}, {"sum_logits": -1.0975875854492188, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.0975875854492188, "logits_per_char": -0.5487937927246094, "bits_per_byte": 0.7917420832350085, "num_chars": 2}, {"sum_logits": -1.343496322631836, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.343496322631836, "logits_per_char": -0.671748161315918, "bits_per_byte": 0.9691277410574505, "num_chars": 2}, {"sum_logits": -1.3894176483154297, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.3894176483154297, "logits_per_char": -0.6947088241577148, "bits_per_byte": 1.0022529754748581, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 662, "native_id": "Mercury_7001295", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2487256526947021, "logits_per_token_corr": -1.2487256526947021, "logits_per_char_corr": -0.6243628263473511, "bits_per_byte_corr": 0.9007651532873876}, "model_output": [{"sum_logits": -1.2330949306488037, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.2330949306488037, "logits_per_char": -0.6165474653244019, "bits_per_byte": 0.8894899706968137, "num_chars": 2}, {"sum_logits": -1.3613946437835693, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3613946437835693, "logits_per_char": -0.6806973218917847, "bits_per_byte": 0.9820386506403816, "num_chars": 2}, {"sum_logits": -1.2487256526947021, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.2487256526947021, "logits_per_char": -0.6243628263473511, "bits_per_byte": 0.9007651532873876, "num_chars": 2}, {"sum_logits": -1.8888452053070068, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.8888452053070068, "logits_per_char": -0.9444226026535034, "bits_per_byte": 1.3625138053526362, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 663, "native_id": "MSA_2012_5_2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3813515901565552, "logits_per_token_corr": -1.3813515901565552, "logits_per_char_corr": -0.6906757950782776, "bits_per_byte_corr": 0.9964345444221888}, "model_output": [{"sum_logits": -1.3813515901565552, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3813515901565552, "logits_per_char": -0.6906757950782776, "bits_per_byte": 0.9964345444221888, "num_chars": 2}, {"sum_logits": -1.2310353517532349, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2310353517532349, "logits_per_char": -0.6155176758766174, "bits_per_byte": 0.8880042985673343, "num_chars": 2}, {"sum_logits": -1.394363522529602, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.394363522529602, "logits_per_char": -0.697181761264801, "bits_per_byte": 1.0058206695756842, "num_chars": 2}, {"sum_logits": -1.6398812532424927, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.6398812532424927, "logits_per_char": -0.8199406266212463, "bits_per_byte": 1.182924275850711, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 664, "native_id": "MCAS_2005_8_7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.441864013671875, "logits_per_token_corr": -1.441864013671875, "logits_per_char_corr": -0.7209320068359375, "bits_per_byte_corr": 1.0400850310810825}, "model_output": [{"sum_logits": -1.3311042785644531, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3311042785644531, "logits_per_char": -0.6655521392822266, "bits_per_byte": 0.9601887707961988, "num_chars": 2}, {"sum_logits": -0.996826171875, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -0.996826171875, "logits_per_char": -0.4984130859375, "bits_per_byte": 0.7190580873967126, "num_chars": 2}, {"sum_logits": -1.441864013671875, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.441864013671875, "logits_per_char": -0.7209320068359375, "bits_per_byte": 1.0400850310810825, "num_chars": 2}, {"sum_logits": -2.1169605255126953, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -2.1169605255126953, "logits_per_char": -1.0584802627563477, "bits_per_byte": 1.5270642259585268, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 665, "native_id": "Mercury_7206553", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4368160963058472, "logits_per_token_corr": -1.4368160963058472, "logits_per_char_corr": -0.7184080481529236, "bits_per_byte_corr": 1.036443728405687}, "model_output": [{"sum_logits": -1.4723767042160034, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4723767042160034, "logits_per_char": -0.7361883521080017, "bits_per_byte": 1.0620952847471952, "num_chars": 2}, {"sum_logits": -1.3537758588790894, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.3537758588790894, "logits_per_char": -0.6768879294395447, "bits_per_byte": 0.9765428590407312, "num_chars": 2}, {"sum_logits": -1.3568028211593628, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3568028211593628, "logits_per_char": -0.6784014105796814, "bits_per_byte": 0.978726350776087, "num_chars": 2}, {"sum_logits": -1.4368160963058472, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4368160963058472, "logits_per_char": -0.7184080481529236, "bits_per_byte": 1.036443728405687, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 666, "native_id": "VASoL_2010_3_39", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3159942626953125, "logits_per_token_corr": -1.3159942626953125, "logits_per_char_corr": -0.6579971313476562, "bits_per_byte_corr": 0.9492891983151096}, "model_output": [{"sum_logits": -1.3159942626953125, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3159942626953125, "logits_per_char": -0.6579971313476562, "bits_per_byte": 0.9492891983151096, "num_chars": 2}, {"sum_logits": -1.2260589599609375, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.2260589599609375, "logits_per_char": -0.6130294799804688, "bits_per_byte": 0.8844145906871977, "num_chars": 2}, {"sum_logits": -1.3707523345947266, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3707523345947266, "logits_per_char": -0.6853761672973633, "bits_per_byte": 0.9887887977041009, "num_chars": 2}, {"sum_logits": -1.7797927856445312, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.7797927856445312, "logits_per_char": -0.8898963928222656, "bits_per_byte": 1.283849112830582, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 667, "native_id": "Mercury_416380", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8695027828216553, "logits_per_token_corr": -1.8695027828216553, "logits_per_char_corr": -0.9347513914108276, "bits_per_byte_corr": 1.3485611968534283}, "model_output": [{"sum_logits": -1.8695027828216553, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.8695027828216553, "logits_per_char": -0.9347513914108276, "bits_per_byte": 1.3485611968534283, "num_chars": 2}, {"sum_logits": -1.3233106136322021, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.3233106136322021, "logits_per_char": -0.6616553068161011, "bits_per_byte": 0.9545668299221404, "num_chars": 2}, {"sum_logits": -0.9959466457366943, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -0.9959466457366943, "logits_per_char": -0.49797332286834717, "bits_per_byte": 0.7184236433976793, "num_chars": 2}, {"sum_logits": -1.722104787826538, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.722104787826538, "logits_per_char": -0.861052393913269, "bits_per_byte": 1.242236018645136, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 668, "native_id": "OHAT_2008_5_34", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4486467838287354, "logits_per_token_corr": -1.4486467838287354, "logits_per_char_corr": -0.7243233919143677, "bits_per_byte_corr": 1.0449777655154822}, "model_output": [{"sum_logits": -1.215454339981079, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.215454339981079, "logits_per_char": -0.6077271699905396, "bits_per_byte": 0.8767649743594653, "num_chars": 2}, {"sum_logits": -1.04085373878479, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.04085373878479, "logits_per_char": -0.520426869392395, "bits_per_byte": 0.7508172636183159, "num_chars": 2}, {"sum_logits": -1.4486467838287354, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4486467838287354, "logits_per_char": -0.7243233919143677, "bits_per_byte": 1.0449777655154822, "num_chars": 2}, {"sum_logits": -2.228060007095337, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -2.228060007095337, "logits_per_char": -1.1140300035476685, "bits_per_byte": 1.6072055615208902, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 669, "native_id": "Mercury_7268328", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3043594360351562, "logits_per_token_corr": -1.3043594360351562, "logits_per_char_corr": -0.6521797180175781, "bits_per_byte_corr": 0.9408964449529985}, "model_output": [{"sum_logits": -1.4554309844970703, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4554309844970703, "logits_per_char": -0.7277154922485352, "bits_per_byte": 1.0498715318457867, "num_chars": 2}, {"sum_logits": -1.3360824584960938, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3360824584960938, "logits_per_char": -0.6680412292480469, "bits_per_byte": 0.9637797685462168, "num_chars": 2}, {"sum_logits": -1.3043594360351562, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.3043594360351562, "logits_per_char": -0.6521797180175781, "bits_per_byte": 0.9408964449529985, "num_chars": 2}, {"sum_logits": -1.5035133361816406, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.5035133361816406, "logits_per_char": -0.7517566680908203, "bits_per_byte": 1.084555617010616, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 670, "native_id": "NYSEDREGENTS_2008_8_36", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6258444786071777, "logits_per_token_corr": -1.6258444786071777, "logits_per_char_corr": -0.8129222393035889, "bits_per_byte_corr": 1.1727988832724814}, "model_output": [{"sum_logits": -1.017453670501709, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.017453670501709, "logits_per_char": -0.5087268352508545, "bits_per_byte": 0.7339376823840718, "num_chars": 2}, {"sum_logits": -1.1145415306091309, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.1145415306091309, "logits_per_char": -0.5572707653045654, "bits_per_byte": 0.8039717695378715, "num_chars": 2}, {"sum_logits": -1.6258444786071777, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.6258444786071777, "logits_per_char": -0.8129222393035889, "bits_per_byte": 1.1727988832724814, "num_chars": 2}, {"sum_logits": -2.2481741905212402, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -2.2481741905212402, "logits_per_char": -1.1240870952606201, "bits_per_byte": 1.6217148778609416, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 671, "native_id": "Mercury_SC_414156", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.616304874420166, "logits_per_token_corr": -1.616304874420166, "logits_per_char_corr": -0.808152437210083, "bits_per_byte_corr": 1.1659175134461537}, "model_output": [{"sum_logits": -1.1817402839660645, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.1817402839660645, "logits_per_char": -0.5908701419830322, "bits_per_byte": 0.8524454236488908, "num_chars": 2}, {"sum_logits": -0.9750238060951233, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": true, "logits_per_token": -0.9750238060951233, "logits_per_char": -0.48751190304756165, "bits_per_byte": 0.7033310049015636, "num_chars": 2}, {"sum_logits": -1.616304874420166, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.616304874420166, "logits_per_char": -0.808152437210083, "bits_per_byte": 1.1659175134461537, "num_chars": 2}, {"sum_logits": -2.2323689460754395, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -2.2323689460754395, "logits_per_char": -1.1161844730377197, "bits_per_byte": 1.610313803969936, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 672, "native_id": "Mercury_7094133", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4900503158569336, "logits_per_token_corr": -1.4900503158569336, "logits_per_char_corr": -0.7450251579284668, "bits_per_byte_corr": 1.074844100681688}, "model_output": [{"sum_logits": -1.559626579284668, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.559626579284668, "logits_per_char": -0.779813289642334, "bits_per_byte": 1.1250327657871124, "num_chars": 2}, {"sum_logits": -1.1868162155151367, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.1868162155151367, "logits_per_char": -0.5934081077575684, "bits_per_byte": 0.8561069342857626, "num_chars": 2}, {"sum_logits": -1.4043874740600586, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4043874740600586, "logits_per_char": -0.7021937370300293, "bits_per_byte": 1.01305142215724, "num_chars": 2}, {"sum_logits": -1.4900503158569336, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4900503158569336, "logits_per_char": -0.7450251579284668, "bits_per_byte": 1.074844100681688, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 673, "native_id": "MEA_2013_5_15", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.490685224533081, "logits_per_token_corr": -1.490685224533081, "logits_per_char_corr": -0.7453426122665405, "bits_per_byte_corr": 1.075302090480936}, "model_output": [{"sum_logits": -1.5339934825897217, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.5339934825897217, "logits_per_char": -0.7669967412948608, "bits_per_byte": 1.1065423950448858, "num_chars": 2}, {"sum_logits": -1.1113440990447998, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.1113440990447998, "logits_per_char": -0.5556720495223999, "bits_per_byte": 0.8016653102071488, "num_chars": 2}, {"sum_logits": -1.490685224533081, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.490685224533081, "logits_per_char": -0.7453426122665405, "bits_per_byte": 1.075302090480936, "num_chars": 2}, {"sum_logits": -1.8270061016082764, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.8270061016082764, "logits_per_char": -0.9135030508041382, "bits_per_byte": 1.3179063212330158, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 674, "native_id": "OHAT_2010_8_35", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.717242956161499, "logits_per_token_corr": -1.717242956161499, "logits_per_char_corr": -0.8586214780807495, "bits_per_byte_corr": 1.2387289484287392}, "model_output": [{"sum_logits": -1.717242956161499, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.717242956161499, "logits_per_char": -0.8586214780807495, "bits_per_byte": 1.2387289484287392, "num_chars": 2}, {"sum_logits": -1.2636277675628662, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.2636277675628662, "logits_per_char": -0.6318138837814331, "bits_per_byte": 0.9115147568969243, "num_chars": 2}, {"sum_logits": -1.2216336727142334, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.2216336727142334, "logits_per_char": -0.6108168363571167, "bits_per_byte": 0.8812224207045308, "num_chars": 2}, {"sum_logits": -1.4629247188568115, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4629247188568115, "logits_per_char": -0.7314623594284058, "bits_per_byte": 1.0552771185450596, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 675, "native_id": "Mercury_SC_416174", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4998856782913208, "logits_per_token_corr": -1.4998856782913208, "logits_per_char_corr": -0.7499428391456604, "bits_per_byte_corr": 1.081938814986411}, "model_output": [{"sum_logits": -1.3918153047561646, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3918153047561646, "logits_per_char": -0.6959076523780823, "bits_per_byte": 1.0039825190032612, "num_chars": 2}, {"sum_logits": -1.0859166383743286, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.0859166383743286, "logits_per_char": -0.5429583191871643, "bits_per_byte": 0.7833232745012916, "num_chars": 2}, {"sum_logits": -1.4998856782913208, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4998856782913208, "logits_per_char": -0.7499428391456604, "bits_per_byte": 1.081938814986411, "num_chars": 2}, {"sum_logits": -1.7440835237503052, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.7440835237503052, "logits_per_char": -0.8720417618751526, "bits_per_byte": 1.2580903253062607, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 676, "native_id": "TIMSS_1995_8_J6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0977451801300049, "logits_per_token_corr": -1.0977451801300049, "logits_per_char_corr": -0.5488725900650024, "bits_per_byte_corr": 0.7918557637672289}, "model_output": [{"sum_logits": -1.2940399646759033, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.2940399646759033, "logits_per_char": -0.6470199823379517, "bits_per_byte": 0.9334525198756982, "num_chars": 2}, {"sum_logits": -1.0977451801300049, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.0977451801300049, "logits_per_char": -0.5488725900650024, "bits_per_byte": 0.7918557637672289, "num_chars": 2}, {"sum_logits": -1.3733704090118408, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.3733704090118408, "logits_per_char": -0.6866852045059204, "bits_per_byte": 0.9906773391932268, "num_chars": 2}, {"sum_logits": -2.0549938678741455, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -2.0549938678741455, "logits_per_char": -1.0274969339370728, "bits_per_byte": 1.4823647311206947, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 677, "native_id": "Mercury_SC_401587", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4567383527755737, "logits_per_token_corr": -1.4567383527755737, "logits_per_char_corr": -0.7283691763877869, "bits_per_byte_corr": 1.0508145987117936}, "model_output": [{"sum_logits": -1.4567383527755737, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4567383527755737, "logits_per_char": -0.7283691763877869, "bits_per_byte": 1.0508145987117936, "num_chars": 2}, {"sum_logits": -1.4112271070480347, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4112271070480347, "logits_per_char": -0.7056135535240173, "bits_per_byte": 1.0179851744538704, "num_chars": 2}, {"sum_logits": -1.2633122205734253, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2633122205734253, "logits_per_char": -0.6316561102867126, "bits_per_byte": 0.9112871378585072, "num_chars": 2}, {"sum_logits": -1.5001744031906128, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5001744031906128, "logits_per_char": -0.7500872015953064, "bits_per_byte": 1.0821470859766062, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 678, "native_id": "MDSA_2011_5_23", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9354255199432373, "logits_per_token_corr": -1.9354255199432373, "logits_per_char_corr": -0.9677127599716187, "bits_per_byte_corr": 1.3961143998170291}, "model_output": [{"sum_logits": -1.3553359508514404, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.3553359508514404, "logits_per_char": -0.6776679754257202, "bits_per_byte": 0.9776682275166528, "num_chars": 2}, {"sum_logits": -1.0280139446258545, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.0280139446258545, "logits_per_char": -0.5140069723129272, "bits_per_byte": 0.7415553099387436, "num_chars": 2}, {"sum_logits": -1.477928876876831, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.477928876876831, "logits_per_char": -0.7389644384384155, "bits_per_byte": 1.0661003307291657, "num_chars": 2}, {"sum_logits": -1.9354255199432373, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.9354255199432373, "logits_per_char": -0.9677127599716187, "bits_per_byte": 1.3961143998170291, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 679, "native_id": "AIMS_2008_8_11", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2763243913650513, "logits_per_token_corr": -1.2763243913650513, "logits_per_char_corr": -0.6381621956825256, "bits_per_byte_corr": 0.9206734349946535}, "model_output": [{"sum_logits": -1.5736056566238403, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.5736056566238403, "logits_per_char": -0.7868028283119202, "bits_per_byte": 1.1351165385638333, "num_chars": 2}, {"sum_logits": -1.221497654914856, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.221497654914856, "logits_per_char": -0.610748827457428, "bits_per_byte": 0.8811243046022136, "num_chars": 2}, {"sum_logits": -1.2763243913650513, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.2763243913650513, "logits_per_char": -0.6381621956825256, "bits_per_byte": 0.9206734349946535, "num_chars": 2}, {"sum_logits": -1.6129924058914185, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.6129924058914185, "logits_per_char": -0.8064962029457092, "bits_per_byte": 1.1635280724863897, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 680, "native_id": "Mercury_7159215", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8409961462020874, "logits_per_token_corr": -1.8409961462020874, "logits_per_char_corr": -0.9204980731010437, "bits_per_byte_corr": 1.3279980052116764}, "model_output": [{"sum_logits": -1.592129111289978, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.592129111289978, "logits_per_char": -0.796064555644989, "bits_per_byte": 1.148478386657327, "num_chars": 2}, {"sum_logits": -1.0353530645370483, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.0353530645370483, "logits_per_char": -0.5176765322685242, "bits_per_byte": 0.7468493658889318, "num_chars": 2}, {"sum_logits": -1.3105148077011108, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3105148077011108, "logits_per_char": -0.6552574038505554, "bits_per_byte": 0.9453366070416522, "num_chars": 2}, {"sum_logits": -1.8409961462020874, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.8409961462020874, "logits_per_char": -0.9204980731010437, "bits_per_byte": 1.3279980052116764, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 681, "native_id": "MCAS_2006_9_30", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3146799802780151, "logits_per_token_corr": -1.3146799802780151, "logits_per_char_corr": -0.6573399901390076, "bits_per_byte_corr": 0.9483411439522277}, "model_output": [{"sum_logits": -1.3360899686813354, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3360899686813354, "logits_per_char": -0.6680449843406677, "bits_per_byte": 0.963785185999719, "num_chars": 2}, {"sum_logits": -1.3146799802780151, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3146799802780151, "logits_per_char": -0.6573399901390076, "bits_per_byte": 0.9483411439522277, "num_chars": 2}, {"sum_logits": -1.2975844144821167, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2975844144821167, "logits_per_char": -0.6487922072410583, "bits_per_byte": 0.9360092999547519, "num_chars": 2}, {"sum_logits": -1.7617663145065308, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.7617663145065308, "logits_per_char": -0.8808831572532654, "bits_per_byte": 1.270845762572812, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 682, "native_id": "MCAS_1999_4_27", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5481499433517456, "logits_per_token_corr": -1.5481499433517456, "logits_per_char_corr": -0.7740749716758728, "bits_per_byte_corr": 1.116754122913849}, "model_output": [{"sum_logits": -1.5481499433517456, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5481499433517456, "logits_per_char": -0.7740749716758728, "bits_per_byte": 1.116754122913849, "num_chars": 2}, {"sum_logits": -1.3321388959884644, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3321388959884644, "logits_per_char": -0.6660694479942322, "bits_per_byte": 0.9609350895096185, "num_chars": 2}, {"sum_logits": -1.2835205793380737, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2835205793380737, "logits_per_char": -0.6417602896690369, "bits_per_byte": 0.9258643873456494, "num_chars": 2}, {"sum_logits": -1.4826706647872925, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4826706647872925, "logits_per_char": -0.7413353323936462, "bits_per_byte": 1.0695208076808531, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 683, "native_id": "Mercury_7016538", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4451038837432861, "logits_per_token_corr": -1.4451038837432861, "logits_per_char_corr": -0.7225519418716431, "bits_per_byte_corr": 1.042422103323659}, "model_output": [{"sum_logits": -1.323106050491333, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.323106050491333, "logits_per_char": -0.6615530252456665, "bits_per_byte": 0.9544192688077, "num_chars": 2}, {"sum_logits": -1.2167961597442627, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2167961597442627, "logits_per_char": -0.6083980798721313, "bits_per_byte": 0.877732892718522, "num_chars": 2}, {"sum_logits": -1.4451038837432861, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4451038837432861, "logits_per_char": -0.7225519418716431, "bits_per_byte": 1.042422103323659, "num_chars": 2}, {"sum_logits": -1.7157223224639893, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.7157223224639893, "logits_per_char": -0.8578611612319946, "bits_per_byte": 1.2376320430815353, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 684, "native_id": "Mercury_SC_409266", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2677401304244995, "logits_per_token_corr": -1.2677401304244995, "logits_per_char_corr": -0.6338700652122498, "bits_per_byte_corr": 0.9144811996503336}, "model_output": [{"sum_logits": -1.2677401304244995, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.2677401304244995, "logits_per_char": -0.6338700652122498, "bits_per_byte": 0.9144811996503336, "num_chars": 2}, {"sum_logits": -1.247085452079773, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.247085452079773, "logits_per_char": -0.6235427260398865, "bits_per_byte": 0.899581998640776, "num_chars": 2}, {"sum_logits": -1.393527865409851, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.393527865409851, "logits_per_char": -0.6967639327049255, "bits_per_byte": 1.0052178703844097, "num_chars": 2}, {"sum_logits": -1.8493040800094604, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.8493040800094604, "logits_per_char": -0.9246520400047302, "bits_per_byte": 1.333990912663646, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 685, "native_id": "OHAT_2007_5_15", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2889916896820068, "logits_per_token_corr": -1.2889916896820068, "logits_per_char_corr": -0.6444958448410034, "bits_per_byte_corr": 0.9298109592263265}, "model_output": [{"sum_logits": -1.257354497909546, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.257354497909546, "logits_per_char": -0.628677248954773, "bits_per_byte": 0.9069895493874188, "num_chars": 2}, {"sum_logits": -1.2889916896820068, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.2889916896820068, "logits_per_char": -0.6444958448410034, "bits_per_byte": 0.9298109592263265, "num_chars": 2}, {"sum_logits": -1.3360555171966553, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3360555171966553, "logits_per_char": -0.6680277585983276, "bits_per_byte": 0.9637603345066693, "num_chars": 2}, {"sum_logits": -1.886432409286499, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.886432409286499, "logits_per_char": -0.9432162046432495, "bits_per_byte": 1.3607733409259033, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 686, "native_id": "Mercury_7230073", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.124998927116394, "logits_per_token_corr": -1.124998927116394, "logits_per_char_corr": -0.562499463558197, "bits_per_byte_corr": 0.811515186578696}, "model_output": [{"sum_logits": -1.5535286664962769, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.5535286664962769, "logits_per_char": -0.7767643332481384, "bits_per_byte": 1.1206340515173165, "num_chars": 2}, {"sum_logits": -1.124998927116394, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.124998927116394, "logits_per_char": -0.562499463558197, "bits_per_byte": 0.811515186578696, "num_chars": 2}, {"sum_logits": -1.3932923078536987, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3932923078536987, "logits_per_char": -0.6966461539268494, "bits_per_byte": 1.0050479515253572, "num_chars": 2}, {"sum_logits": -1.6206520795822144, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.6206520795822144, "logits_per_char": -0.8103260397911072, "bits_per_byte": 1.169053359110663, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 687, "native_id": "Mercury_7245840", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3011568784713745, "logits_per_token_corr": -1.3011568784713745, "logits_per_char_corr": -0.6505784392356873, "bits_per_byte_corr": 0.9385862879952821}, "model_output": [{"sum_logits": -1.543735384941101, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.543735384941101, "logits_per_char": -0.7718676924705505, "bits_per_byte": 1.1135696921504707, "num_chars": 2}, {"sum_logits": -1.3011568784713745, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.3011568784713745, "logits_per_char": -0.6505784392356873, "bits_per_byte": 0.9385862879952821, "num_chars": 2}, {"sum_logits": -1.431339144706726, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.431339144706726, "logits_per_char": -0.715669572353363, "bits_per_byte": 1.0324929429500638, "num_chars": 2}, {"sum_logits": -1.3371771574020386, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3371771574020386, "logits_per_char": -0.6685885787010193, "bits_per_byte": 0.9645694268876539, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 688, "native_id": "Mercury_SC_401788", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3045604228973389, "logits_per_token_corr": -1.3045604228973389, "logits_per_char_corr": -0.6522802114486694, "bits_per_byte_corr": 0.941041426327676}, "model_output": [{"sum_logits": -1.3802039623260498, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3802039623260498, "logits_per_char": -0.6901019811630249, "bits_per_byte": 0.9956067059322602, "num_chars": 2}, {"sum_logits": -1.3045604228973389, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3045604228973389, "logits_per_char": -0.6522802114486694, "bits_per_byte": 0.941041426327676, "num_chars": 2}, {"sum_logits": -1.2867801189422607, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2867801189422607, "logits_per_char": -0.6433900594711304, "bits_per_byte": 0.9282156481569219, "num_chars": 2}, {"sum_logits": -1.6735122203826904, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.6735122203826904, "logits_per_char": -0.8367561101913452, "bits_per_byte": 1.2071838906074601, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 689, "native_id": "ACTAAP_2014_7_5", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7077670097351074, "logits_per_token_corr": -1.7077670097351074, "logits_per_char_corr": -0.8538835048675537, "bits_per_byte_corr": 1.2318934979701919}, "model_output": [{"sum_logits": -1.5568041801452637, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5568041801452637, "logits_per_char": -0.7784020900726318, "bits_per_byte": 1.122996835166197, "num_chars": 2}, {"sum_logits": -1.298102855682373, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.298102855682373, "logits_per_char": -0.6490514278411865, "bits_per_byte": 0.9363832762290534, "num_chars": 2}, {"sum_logits": -1.1363654136657715, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.1363654136657715, "logits_per_char": -0.5681827068328857, "bits_per_byte": 0.819714373467261, "num_chars": 2}, {"sum_logits": -1.7077670097351074, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.7077670097351074, "logits_per_char": -0.8538835048675537, "bits_per_byte": 1.2318934979701919, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 690, "native_id": "MCAS_2004_5_11", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.0426011085510254, "logits_per_token_corr": -2.0426011085510254, "logits_per_char_corr": -1.0213005542755127, "bits_per_byte_corr": 1.4734252449114904}, "model_output": [{"sum_logits": -1.6068960428237915, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.6068960428237915, "logits_per_char": -0.8034480214118958, "bits_per_byte": 1.1591304761038244, "num_chars": 2}, {"sum_logits": -0.9597269296646118, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -0.9597269296646118, "logits_per_char": -0.4798634648323059, "bits_per_byte": 0.6922966410178606, "num_chars": 2}, {"sum_logits": -1.2753950357437134, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.2753950357437134, "logits_per_char": -0.6376975178718567, "bits_per_byte": 0.9200030466215897, "num_chars": 2}, {"sum_logits": -2.0426011085510254, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -2.0426011085510254, "logits_per_char": -1.0213005542755127, "bits_per_byte": 1.4734252449114904, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 691, "native_id": "NCEOGA_2013_8_7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4553221464157104, "logits_per_token_corr": -1.4553221464157104, "logits_per_char_corr": -0.7276610732078552, "bits_per_byte_corr": 1.049793021765668}, "model_output": [{"sum_logits": -1.4553221464157104, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4553221464157104, "logits_per_char": -0.7276610732078552, "bits_per_byte": 1.049793021765668, "num_chars": 2}, {"sum_logits": -1.1098212003707886, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.1098212003707886, "logits_per_char": -0.5549106001853943, "bits_per_byte": 0.8005667710247618, "num_chars": 2}, {"sum_logits": -1.3835428953170776, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3835428953170776, "logits_per_char": -0.6917714476585388, "bits_per_byte": 0.99801523696627, "num_chars": 2}, {"sum_logits": -1.7369707822799683, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.7369707822799683, "logits_per_char": -0.8684853911399841, "bits_per_byte": 1.2529595668830669, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 692, "native_id": "LEAP__7_10339", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6783279180526733, "logits_per_token_corr": -1.6783279180526733, "logits_per_char_corr": -0.8391639590263367, "bits_per_byte_corr": 1.210657682180915}, "model_output": [{"sum_logits": -1.6783279180526733, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.6783279180526733, "logits_per_char": -0.8391639590263367, "bits_per_byte": 1.210657682180915, "num_chars": 2}, {"sum_logits": -1.2204574346542358, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.2204574346542358, "logits_per_char": -0.6102287173271179, "bits_per_byte": 0.8803739442964986, "num_chars": 2}, {"sum_logits": -1.0245822668075562, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": true, "logits_per_token": -1.0245822668075562, "logits_per_char": -0.5122911334037781, "bits_per_byte": 0.739079877653548, "num_chars": 2}, {"sum_logits": -1.9056094884872437, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.9056094884872437, "logits_per_char": -0.9528047442436218, "bits_per_byte": 1.374606679456738, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 693, "native_id": "Mercury_7018270", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3001524209976196, "logits_per_token_corr": -1.3001524209976196, "logits_per_char_corr": -0.6500762104988098, "bits_per_byte_corr": 0.9378617250871966}, "model_output": [{"sum_logits": -1.2461515665054321, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2461515665054321, "logits_per_char": -0.6230757832527161, "bits_per_byte": 0.8989083425973459, "num_chars": 2}, {"sum_logits": -1.3739839792251587, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3739839792251587, "logits_per_char": -0.6869919896125793, "bits_per_byte": 0.9911199365452226, "num_chars": 2}, {"sum_logits": -1.3001524209976196, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3001524209976196, "logits_per_char": -0.6500762104988098, "bits_per_byte": 0.9378617250871966, "num_chars": 2}, {"sum_logits": -1.797524094581604, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.797524094581604, "logits_per_char": -0.898762047290802, "bits_per_byte": 1.2966395485665836, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 694, "native_id": "Mercury_7034808", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4568819999694824, "logits_per_token_corr": -1.4568819999694824, "logits_per_char_corr": -0.7284409999847412, "bits_per_byte_corr": 1.0509182182589387}, "model_output": [{"sum_logits": -1.4568819999694824, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4568819999694824, "logits_per_char": -0.7284409999847412, "bits_per_byte": 1.0509182182589387, "num_chars": 2}, {"sum_logits": -1.229295253753662, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.229295253753662, "logits_per_char": -0.614647626876831, "bits_per_byte": 0.8867490831900112, "num_chars": 2}, {"sum_logits": -1.3377947807312012, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3377947807312012, "logits_per_char": -0.6688973903656006, "bits_per_byte": 0.9650149479447143, "num_chars": 2}, {"sum_logits": -1.6020960807800293, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.6020960807800293, "logits_per_char": -0.8010480403900146, "bits_per_byte": 1.1556680353853264, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 695, "native_id": "Mercury_7216300", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4126826524734497, "logits_per_token_corr": -1.4126826524734497, "logits_per_char_corr": -0.7063413262367249, "bits_per_byte_corr": 1.0190351285373884}, "model_output": [{"sum_logits": -1.48188316822052, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.48188316822052, "logits_per_char": -0.74094158411026, "bits_per_byte": 1.0689527489850528, "num_chars": 2}, {"sum_logits": -1.1584712266921997, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.1584712266921997, "logits_per_char": -0.5792356133460999, "bits_per_byte": 0.8356603468812956, "num_chars": 2}, {"sum_logits": -1.4126826524734497, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4126826524734497, "logits_per_char": -0.7063413262367249, "bits_per_byte": 1.0190351285373884, "num_chars": 2}, {"sum_logits": -1.669425129890442, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.669425129890442, "logits_per_char": -0.834712564945221, "bits_per_byte": 1.2042356780150423, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 696, "native_id": "Mercury_SC_400985", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4655811786651611, "logits_per_token_corr": -1.4655811786651611, "logits_per_char_corr": -0.7327905893325806, "bits_per_byte_corr": 1.0571933492409744}, "model_output": [{"sum_logits": -1.1527912616729736, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.1527912616729736, "logits_per_char": -0.5763956308364868, "bits_per_byte": 0.8315631181984626, "num_chars": 2}, {"sum_logits": -1.111985445022583, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.111985445022583, "logits_per_char": -0.5559927225112915, "bits_per_byte": 0.80212794353797, "num_chars": 2}, {"sum_logits": -1.4655811786651611, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4655811786651611, "logits_per_char": -0.7327905893325806, "bits_per_byte": 1.0571933492409744, "num_chars": 2}, {"sum_logits": -2.2001583576202393, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -2.2001583576202393, "logits_per_char": -1.1000791788101196, "bits_per_byte": 1.587078775855703, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 697, "native_id": "Mercury_7188528", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3413276672363281, "logits_per_token_corr": -1.3413276672363281, "logits_per_char_corr": -0.6706638336181641, "bits_per_byte_corr": 0.9675633868652013}, "model_output": [{"sum_logits": -1.3413276672363281, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3413276672363281, "logits_per_char": -0.6706638336181641, "bits_per_byte": 0.9675633868652013, "num_chars": 2}, {"sum_logits": -1.3072700500488281, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3072700500488281, "logits_per_char": -0.6536350250244141, "bits_per_byte": 0.9429960091547331, "num_chars": 2}, {"sum_logits": -1.2964706420898438, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.2964706420898438, "logits_per_char": -0.6482353210449219, "bits_per_byte": 0.9352058830012457, "num_chars": 2}, {"sum_logits": -1.7640743255615234, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.7640743255615234, "logits_per_char": -0.8820371627807617, "bits_per_byte": 1.2725106406244906, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 698, "native_id": "TIMSS_1995_8_R2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9197224378585815, "logits_per_token_corr": -0.9197224378585815, "logits_per_char_corr": -0.45986121892929077, "bits_per_byte_corr": 0.6634395000469183}, "model_output": [{"sum_logits": -1.5157402753829956, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.5157402753829956, "logits_per_char": -0.7578701376914978, "bits_per_byte": 1.0933754892861454, "num_chars": 2}, {"sum_logits": -0.9197224378585815, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": true, "logits_per_token": -0.9197224378585815, "logits_per_char": -0.45986121892929077, "bits_per_byte": 0.6634395000469183, "num_chars": 2}, {"sum_logits": -1.407601237297058, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.407601237297058, "logits_per_char": -0.703800618648529, "bits_per_byte": 1.015369662299547, "num_chars": 2}, {"sum_logits": -2.0562429428100586, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -2.0562429428100586, "logits_per_char": -1.0281214714050293, "bits_per_byte": 1.4832657482285656, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 699, "native_id": "Mercury_SC_400032", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2979958057403564, "logits_per_token_corr": -1.2979958057403564, "logits_per_char_corr": -0.6489979028701782, "bits_per_byte_corr": 0.936306056018816}, "model_output": [{"sum_logits": -1.4506771564483643, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4506771564483643, "logits_per_char": -0.7253385782241821, "bits_per_byte": 1.0464423697702308, "num_chars": 2}, {"sum_logits": -1.3729546070098877, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3729546070098877, "logits_per_char": -0.6864773035049438, "bits_per_byte": 0.9903774014501219, "num_chars": 2}, {"sum_logits": -1.2979958057403564, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.2979958057403564, "logits_per_char": -0.6489979028701782, "bits_per_byte": 0.936306056018816, "num_chars": 2}, {"sum_logits": -1.4966976642608643, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4966976642608643, "logits_per_char": -0.7483488321304321, "bits_per_byte": 1.0796391489703974, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 700, "native_id": "Mercury_7252245", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7541370391845703, "logits_per_token_corr": -1.7541370391845703, "logits_per_char_corr": -0.8770685195922852, "bits_per_byte_corr": 1.2653424037365235}, "model_output": [{"sum_logits": -1.5391311645507812, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.5391311645507812, "logits_per_char": -0.7695655822753906, "bits_per_byte": 1.1102484491883313, "num_chars": 2}, {"sum_logits": -1.3202590942382812, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3202590942382812, "logits_per_char": -0.6601295471191406, "bits_per_byte": 0.9523656239737457, "num_chars": 2}, {"sum_logits": -1.1074028015136719, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.1074028015136719, "logits_per_char": -0.5537014007568359, "bits_per_byte": 0.7988222650057336, "num_chars": 2}, {"sum_logits": -1.7541370391845703, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.7541370391845703, "logits_per_char": -0.8770685195922852, "bits_per_byte": 1.2653424037365235, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 701, "native_id": "MCAS_2002_8_17", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.110970973968506, "logits_per_token_corr": -2.110970973968506, "logits_per_char_corr": -1.055485486984253, "bits_per_byte_corr": 1.5227436778035481}, "model_output": [{"sum_logits": -1.03639554977417, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.03639554977417, "logits_per_char": -0.518197774887085, "bits_per_byte": 0.74760136002983, "num_chars": 2}, {"sum_logits": -1.2424941062927246, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.2424941062927246, "logits_per_char": -0.6212470531463623, "bits_per_byte": 0.8962700427417831, "num_chars": 2}, {"sum_logits": -1.5370879173278809, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.5370879173278809, "logits_per_char": -0.7685439586639404, "bits_per_byte": 1.108774557870436, "num_chars": 2}, {"sum_logits": -2.110970973968506, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -2.110970973968506, "logits_per_char": -1.055485486984253, "bits_per_byte": 1.5227436778035481, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 702, "native_id": "MDSA_2007_8_30", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2421327829360962, "logits_per_token_corr": -1.2421327829360962, "logits_per_char_corr": -0.6210663914680481, "bits_per_byte_corr": 0.8960094030344004}, "model_output": [{"sum_logits": -1.3865362405776978, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3865362405776978, "logits_per_char": -0.6932681202888489, "bits_per_byte": 1.0001744791478542, "num_chars": 2}, {"sum_logits": -1.3951059579849243, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3951059579849243, "logits_per_char": -0.6975529789924622, "bits_per_byte": 1.0063562235504715, "num_chars": 2}, {"sum_logits": -1.2421327829360962, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2421327829360962, "logits_per_char": -0.6210663914680481, "bits_per_byte": 0.8960094030344004, "num_chars": 2}, {"sum_logits": -1.6110731363296509, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.6110731363296509, "logits_per_char": -0.8055365681648254, "bits_per_byte": 1.162143612146943, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 703, "native_id": "NCEOGA_2013_5_35", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.802732229232788, "logits_per_token_corr": -1.802732229232788, "logits_per_char_corr": -0.901366114616394, "bits_per_byte_corr": 1.300396423583359}, "model_output": [{"sum_logits": -1.39164137840271, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.39164137840271, "logits_per_char": -0.695820689201355, "bits_per_byte": 1.0038570576594568, "num_chars": 2}, {"sum_logits": -1.1590230464935303, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.1590230464935303, "logits_per_char": -0.5795115232467651, "bits_per_byte": 0.836058400726718, "num_chars": 2}, {"sum_logits": -1.354736089706421, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.354736089706421, "logits_per_char": -0.6773680448532104, "bits_per_byte": 0.9772355191670817, "num_chars": 2}, {"sum_logits": -1.802732229232788, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.802732229232788, "logits_per_char": -0.901366114616394, "bits_per_byte": 1.300396423583359, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 704, "native_id": "Mercury_7082758", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.258658766746521, "logits_per_token_corr": -1.258658766746521, "logits_per_char_corr": -0.6293293833732605, "bits_per_byte_corr": 0.9079303804789645}, "model_output": [{"sum_logits": -1.2764276266098022, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.2764276266098022, "logits_per_char": -0.6382138133049011, "bits_per_byte": 0.920747903482477, "num_chars": 2}, {"sum_logits": -1.258658766746521, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.258658766746521, "logits_per_char": -0.6293293833732605, "bits_per_byte": 0.9079303804789645, "num_chars": 2}, {"sum_logits": -1.3585981130599976, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3585981130599976, "logits_per_char": -0.6792990565299988, "bits_per_byte": 0.980021380137085, "num_chars": 2}, {"sum_logits": -1.8422235250473022, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.8422235250473022, "logits_per_char": -0.9211117625236511, "bits_per_byte": 1.3288833718983186, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 705, "native_id": "Mercury_7094308", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.459634780883789, "logits_per_token_corr": -1.459634780883789, "logits_per_char_corr": -0.7298173904418945, "bits_per_byte_corr": 1.052903929945802}, "model_output": [{"sum_logits": -1.240499496459961, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.240499496459961, "logits_per_char": -0.6202497482299805, "bits_per_byte": 0.8948312358846638, "num_chars": 2}, {"sum_logits": -1.1047477722167969, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": true, "logits_per_token": -1.1047477722167969, "logits_per_char": -0.5523738861083984, "bits_per_byte": 0.796907066205724, "num_chars": 2}, {"sum_logits": -1.459634780883789, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.459634780883789, "logits_per_char": -0.7298173904418945, "bits_per_byte": 1.052903929945802, "num_chars": 2}, {"sum_logits": -2.0158920288085938, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -2.0158920288085938, "logits_per_char": -1.0079460144042969, "bits_per_byte": 1.4541587164659195, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 706, "native_id": "Mercury_7136028", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.293798804283142, "logits_per_token_corr": -1.293798804283142, "logits_per_char_corr": -0.646899402141571, "bits_per_byte_corr": 0.9332785594243503}, "model_output": [{"sum_logits": -1.293798804283142, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.293798804283142, "logits_per_char": -0.646899402141571, "bits_per_byte": 0.9332785594243503, "num_chars": 2}, {"sum_logits": -1.2065471410751343, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.2065471410751343, "logits_per_char": -0.6032735705375671, "bits_per_byte": 0.8703397885145516, "num_chars": 2}, {"sum_logits": -1.5343917608261108, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.5343917608261108, "logits_per_char": -0.7671958804130554, "bits_per_byte": 1.1068296920631524, "num_chars": 2}, {"sum_logits": -1.6604312658309937, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.6604312658309937, "logits_per_char": -0.8302156329154968, "bits_per_byte": 1.1977479764765397, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 707, "native_id": "Mercury_7159075", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.314894437789917, "logits_per_token_corr": -1.314894437789917, "logits_per_char_corr": -0.6574472188949585, "bits_per_byte_corr": 0.9484958423466788}, "model_output": [{"sum_logits": -1.1651179790496826, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": true, "logits_per_token": -1.1651179790496826, "logits_per_char": -0.5825589895248413, "bits_per_byte": 0.8404549652133779, "num_chars": 2}, {"sum_logits": -1.314894437789917, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.314894437789917, "logits_per_char": -0.6574472188949585, "bits_per_byte": 0.9484958423466788, "num_chars": 2}, {"sum_logits": -1.371952772140503, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.371952772140503, "logits_per_char": -0.6859763860702515, "bits_per_byte": 0.9896547303511958, "num_chars": 2}, {"sum_logits": -1.990278959274292, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.990278959274292, "logits_per_char": -0.995139479637146, "bits_per_byte": 1.4356827922663655, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 708, "native_id": "MCAS_2015_5_19", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3045997619628906, "logits_per_token_corr": -1.3045997619628906, "logits_per_char_corr": -0.6522998809814453, "bits_per_byte_corr": 0.9410698034650683}, "model_output": [{"sum_logits": -1.3769683837890625, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3769683837890625, "logits_per_char": -0.6884841918945312, "bits_per_byte": 0.9932727293773993, "num_chars": 2}, {"sum_logits": -1.3045997619628906, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.3045997619628906, "logits_per_char": -0.6522998809814453, "bits_per_byte": 0.9410698034650683, "num_chars": 2}, {"sum_logits": -1.4101886749267578, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4101886749267578, "logits_per_char": -0.7050943374633789, "bits_per_byte": 1.0172361040180369, "num_chars": 2}, {"sum_logits": -1.559316635131836, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.559316635131836, "logits_per_char": -0.779658317565918, "bits_per_byte": 1.1248091883409905, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 709, "native_id": "MSA_2012_5_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4560847282409668, "logits_per_token_corr": -1.4560847282409668, "logits_per_char_corr": -0.7280423641204834, "bits_per_byte_corr": 1.050343108274453}, "model_output": [{"sum_logits": -1.4560847282409668, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.4560847282409668, "logits_per_char": -0.7280423641204834, "bits_per_byte": 1.050343108274453, "num_chars": 2}, {"sum_logits": -0.9817652106285095, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": true, "logits_per_token": -0.9817652106285095, "logits_per_char": -0.49088260531425476, "bits_per_byte": 0.7081939003460385, "num_chars": 2}, {"sum_logits": -1.4363932609558105, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.4363932609558105, "logits_per_char": -0.7181966304779053, "bits_per_byte": 1.0361387171743817, "num_chars": 2}, {"sum_logits": -1.9880805015563965, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.9880805015563965, "logits_per_char": -0.9940402507781982, "bits_per_byte": 1.4340969402427584, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 710, "native_id": "MCAS_2014_5_13", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9525866508483887, "logits_per_token_corr": -1.9525866508483887, "logits_per_char_corr": -0.9762933254241943, "bits_per_byte_corr": 1.408493539043492}, "model_output": [{"sum_logits": -1.350635051727295, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.350635051727295, "logits_per_char": -0.6753175258636475, "bits_per_byte": 0.9742772455895884, "num_chars": 2}, {"sum_logits": -1.0992789268493652, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.0992789268493652, "logits_per_char": -0.5496394634246826, "bits_per_byte": 0.79296212816023, "num_chars": 2}, {"sum_logits": -1.3676142692565918, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3676142692565918, "logits_per_char": -0.6838071346282959, "bits_per_byte": 0.986525162053443, "num_chars": 2}, {"sum_logits": -1.9525866508483887, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.9525866508483887, "logits_per_char": -0.9762933254241943, "bits_per_byte": 1.408493539043492, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 711, "native_id": "Mercury_SC_400392", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3410165309906006, "logits_per_token_corr": -1.3410165309906006, "logits_per_char_corr": -0.6705082654953003, "bits_per_byte_corr": 0.9673389495058251}, "model_output": [{"sum_logits": -1.4578301906585693, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4578301906585693, "logits_per_char": -0.7289150953292847, "bits_per_byte": 1.0516021932614206, "num_chars": 2}, {"sum_logits": -1.1047379970550537, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.1047379970550537, "logits_per_char": -0.5523689985275269, "bits_per_byte": 0.7969000149170387, "num_chars": 2}, {"sum_logits": -1.3410165309906006, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3410165309906006, "logits_per_char": -0.6705082654953003, "bits_per_byte": 0.9673389495058251, "num_chars": 2}, {"sum_logits": -1.8023240566253662, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.8023240566253662, "logits_per_char": -0.9011620283126831, "bits_per_byte": 1.3001019892850816, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 712, "native_id": "Mercury_7159320", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5994460582733154, "logits_per_token_corr": -1.5994460582733154, "logits_per_char_corr": -0.7997230291366577, "bits_per_byte_corr": 1.153756448220985}, "model_output": [{"sum_logits": -1.5994460582733154, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5994460582733154, "logits_per_char": -0.7997230291366577, "bits_per_byte": 1.153756448220985, "num_chars": 2}, {"sum_logits": -1.1692798137664795, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.1692798137664795, "logits_per_char": -0.5846399068832397, "bits_per_byte": 0.8434570943668412, "num_chars": 2}, {"sum_logits": -1.3162829875946045, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3162829875946045, "logits_per_char": -0.6581414937973022, "bits_per_byte": 0.9494974693053045, "num_chars": 2}, {"sum_logits": -1.594029188156128, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.594029188156128, "logits_per_char": -0.797014594078064, "bits_per_byte": 1.1498490023933792, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 713, "native_id": "Mercury_7218365", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5698045492172241, "logits_per_token_corr": -1.5698045492172241, "logits_per_char_corr": -0.7849022746086121, "bits_per_byte_corr": 1.1323746191611255}, "model_output": [{"sum_logits": -1.2779687643051147, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.2779687643051147, "logits_per_char": -0.6389843821525574, "bits_per_byte": 0.921859599337655, "num_chars": 2}, {"sum_logits": -1.5698045492172241, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.5698045492172241, "logits_per_char": -0.7849022746086121, "bits_per_byte": 1.1323746191611255, "num_chars": 2}, {"sum_logits": -1.1615575551986694, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.1615575551986694, "logits_per_char": -0.5807787775993347, "bits_per_byte": 0.8378866622967164, "num_chars": 2}, {"sum_logits": -1.914166808128357, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.914166808128357, "logits_per_char": -0.9570834040641785, "bits_per_byte": 1.3807794807615101, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 714, "native_id": "MCAS_2004_9_10-v1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.195529818534851, "logits_per_token_corr": -1.195529818534851, "logits_per_char_corr": -0.5977649092674255, "bits_per_byte_corr": 0.8623924702181756}, "model_output": [{"sum_logits": -1.7732466459274292, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.7732466459274292, "logits_per_char": -0.8866233229637146, "bits_per_byte": 1.2791270711771638, "num_chars": 2}, {"sum_logits": -1.3023508787155151, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.3023508787155151, "logits_per_char": -0.6511754393577576, "bits_per_byte": 0.9394475771108037, "num_chars": 2}, {"sum_logits": -1.195529818534851, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": true, "logits_per_token": -1.195529818534851, "logits_per_char": -0.5977649092674255, "bits_per_byte": 0.8623924702181756, "num_chars": 2}, {"sum_logits": -1.9584769010543823, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.9584769010543823, "logits_per_char": -0.9792384505271912, "bits_per_byte": 1.4127424564243862, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 715, "native_id": "AIMS_2009_4_12", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.3072972297668457, "logits_per_token_corr": -2.3072972297668457, "logits_per_char_corr": -1.1536486148834229, "bits_per_byte_corr": 1.6643631356219315}, "model_output": [{"sum_logits": -1.070202350616455, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.070202350616455, "logits_per_char": -0.5351011753082275, "bits_per_byte": 0.7719878119915903, "num_chars": 2}, {"sum_logits": -1.1573128700256348, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.1573128700256348, "logits_per_char": -0.5786564350128174, "bits_per_byte": 0.8348247691720782, "num_chars": 2}, {"sum_logits": -1.4624981880187988, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4624981880187988, "logits_per_char": -0.7312490940093994, "bits_per_byte": 1.0549694415826658, "num_chars": 2}, {"sum_logits": -2.3072972297668457, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -2.3072972297668457, "logits_per_char": -1.1536486148834229, "bits_per_byte": 1.6643631356219315, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 716, "native_id": "Mercury_SC_414274", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0115288496017456, "logits_per_token_corr": -1.0115288496017456, "logits_per_char_corr": -0.5057644248008728, "bits_per_byte_corr": 0.7296638275188024}, "model_output": [{"sum_logits": -1.244364619255066, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.244364619255066, "logits_per_char": -0.622182309627533, "bits_per_byte": 0.8976193326291282, "num_chars": 2}, {"sum_logits": -1.0115288496017456, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.0115288496017456, "logits_per_char": -0.5057644248008728, "bits_per_byte": 0.7296638275188024, "num_chars": 2}, {"sum_logits": -1.4728859663009644, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4728859663009644, "logits_per_char": -0.7364429831504822, "bits_per_byte": 1.0624626396894383, "num_chars": 2}, {"sum_logits": -2.227667808532715, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -2.227667808532715, "logits_per_char": -1.1138339042663574, "bits_per_byte": 1.6069226500602207, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 717, "native_id": "MCAS_2005_9_6", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1387436389923096, "logits_per_token_corr": -1.1387436389923096, "logits_per_char_corr": -0.5693718194961548, "bits_per_byte_corr": 0.8214299004096187}, "model_output": [{"sum_logits": -1.797919511795044, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.797919511795044, "logits_per_char": -0.898959755897522, "bits_per_byte": 1.2969247817930398, "num_chars": 2}, {"sum_logits": -1.6512463092803955, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.6512463092803955, "logits_per_char": -0.8256231546401978, "bits_per_byte": 1.1911224308433708, "num_chars": 2}, {"sum_logits": -1.1387436389923096, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.1387436389923096, "logits_per_char": -0.5693718194961548, "bits_per_byte": 0.8214299004096187, "num_chars": 2}, {"sum_logits": -1.2449314594268799, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.2449314594268799, "logits_per_char": -0.6224657297134399, "bits_per_byte": 0.8980282213815548, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 718, "native_id": "MCAS_1998_4_23", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4549281597137451, "logits_per_token_corr": -1.4549281597137451, "logits_per_char_corr": -0.7274640798568726, "bits_per_byte_corr": 1.0495088204351168}, "model_output": [{"sum_logits": -1.4549281597137451, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.4549281597137451, "logits_per_char": -0.7274640798568726, "bits_per_byte": 1.0495088204351168, "num_chars": 2}, {"sum_logits": -0.9673793315887451, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": true, "logits_per_token": -0.9673793315887451, "logits_per_char": -0.48368966579437256, "bits_per_byte": 0.6978166821712827, "num_chars": 2}, {"sum_logits": -1.4098269939422607, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.4098269939422607, "logits_per_char": -0.7049134969711304, "bits_per_byte": 1.0169752063366777, "num_chars": 2}, {"sum_logits": -2.067448377609253, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -2.067448377609253, "logits_per_char": -1.0337241888046265, "bits_per_byte": 1.4913487608364726, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 719, "native_id": "Mercury_7075023", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.895449161529541, "logits_per_token_corr": -1.895449161529541, "logits_per_char_corr": -0.9477245807647705, "bits_per_byte_corr": 1.3672775527988885}, "model_output": [{"sum_logits": -1.3642277717590332, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3642277717590332, "logits_per_char": -0.6821138858795166, "bits_per_byte": 0.984082320480586, "num_chars": 2}, {"sum_logits": -1.044477939605713, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.044477939605713, "logits_per_char": -0.5222389698028564, "bits_per_byte": 0.7534315718940834, "num_chars": 2}, {"sum_logits": -1.4587883949279785, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4587883949279785, "logits_per_char": -0.7293941974639893, "bits_per_byte": 1.0522933915352386, "num_chars": 2}, {"sum_logits": -1.895449161529541, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.895449161529541, "logits_per_char": -0.9477245807647705, "bits_per_byte": 1.3672775527988885, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 720, "native_id": "Mercury_SC_400182", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5900388956069946, "logits_per_token_corr": -1.5900388956069946, "logits_per_char_corr": -0.7950194478034973, "bits_per_byte_corr": 1.1469706147572116}, "model_output": [{"sum_logits": -1.253313660621643, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.253313660621643, "logits_per_char": -0.6266568303108215, "bits_per_byte": 0.9040747014292684, "num_chars": 2}, {"sum_logits": -1.5900388956069946, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.5900388956069946, "logits_per_char": -0.7950194478034973, "bits_per_byte": 1.1469706147572116, "num_chars": 2}, {"sum_logits": -1.1437822580337524, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.1437822580337524, "logits_per_char": -0.5718911290168762, "bits_per_byte": 0.8250644957616304, "num_chars": 2}, {"sum_logits": -1.775408387184143, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.775408387184143, "logits_per_char": -0.8877041935920715, "bits_per_byte": 1.280686437872538, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 721, "native_id": "Mercury_SC_400133", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1994681358337402, "logits_per_token_corr": -1.1994681358337402, "logits_per_char_corr": -0.5997340679168701, "bits_per_byte_corr": 0.8652333656364549}, "model_output": [{"sum_logits": -1.2226042747497559, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.2226042747497559, "logits_per_char": -0.6113021373748779, "bits_per_byte": 0.8819225620761938, "num_chars": 2}, {"sum_logits": -1.1994681358337402, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.1994681358337402, "logits_per_char": -0.5997340679168701, "bits_per_byte": 0.8652333656364549, "num_chars": 2}, {"sum_logits": -1.48225736618042, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.48225736618042, "logits_per_char": -0.74112868309021, "bits_per_byte": 1.0692226757555823, "num_chars": 2}, {"sum_logits": -1.8090720176696777, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.8090720176696777, "logits_per_char": -0.9045360088348389, "bits_per_byte": 1.3049696142524552, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 722, "native_id": "MSA_2013_5_11", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8091468811035156, "logits_per_token_corr": -1.8091468811035156, "logits_per_char_corr": -0.9045734405517578, "bits_per_byte_corr": 1.3050236168048261}, "model_output": [{"sum_logits": -1.342864990234375, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.342864990234375, "logits_per_char": -0.6714324951171875, "bits_per_byte": 0.9686723309979655, "num_chars": 2}, {"sum_logits": -1.2818374633789062, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.2818374633789062, "logits_per_char": -0.6409187316894531, "bits_per_byte": 0.9246502758218824, "num_chars": 2}, {"sum_logits": -1.264678955078125, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.264678955078125, "logits_per_char": -0.6323394775390625, "bits_per_byte": 0.9122730284045789, "num_chars": 2}, {"sum_logits": -1.8091468811035156, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.8091468811035156, "logits_per_char": -0.9045734405517578, "bits_per_byte": 1.3050236168048261, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 723, "native_id": "Mercury_SC_408706", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2322131395339966, "logits_per_token_corr": -1.2322131395339966, "logits_per_char_corr": -0.6161065697669983, "bits_per_byte_corr": 0.8888538928625972}, "model_output": [{"sum_logits": -1.4586669206619263, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4586669206619263, "logits_per_char": -0.7293334603309631, "bits_per_byte": 1.052205766374624, "num_chars": 2}, {"sum_logits": -1.2322131395339966, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.2322131395339966, "logits_per_char": -0.6161065697669983, "bits_per_byte": 0.8888538928625972, "num_chars": 2}, {"sum_logits": -1.7047663927078247, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.7047663927078247, "logits_per_char": -0.8523831963539124, "bits_per_byte": 1.2297290103177565, "num_chars": 2}, {"sum_logits": -1.7768107652664185, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.7768107652664185, "logits_per_char": -0.8884053826332092, "bits_per_byte": 1.2816980398249138, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 724, "native_id": "Mercury_7213325", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6891305446624756, "logits_per_token_corr": -1.6891305446624756, "logits_per_char_corr": -0.8445652723312378, "bits_per_byte_corr": 1.218450130100189}, "model_output": [{"sum_logits": -1.3957555294036865, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3957555294036865, "logits_per_char": -0.6978777647018433, "bits_per_byte": 1.0068247902827474, "num_chars": 2}, {"sum_logits": -1.2033746242523193, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.2033746242523193, "logits_per_char": -0.6016873121261597, "bits_per_byte": 0.8680513013708441, "num_chars": 2}, {"sum_logits": -1.3750817775726318, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3750817775726318, "logits_per_char": -0.6875408887863159, "bits_per_byte": 0.9919118306611209, "num_chars": 2}, {"sum_logits": -1.6891305446624756, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.6891305446624756, "logits_per_char": -0.8445652723312378, "bits_per_byte": 1.218450130100189, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 725, "native_id": "Mercury_SC_LBS10932", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0454161167144775, "logits_per_token_corr": -1.0454161167144775, "logits_per_char_corr": -0.5227080583572388, "bits_per_byte_corr": 0.7541083236252291}, "model_output": [{"sum_logits": -1.4137365818023682, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4137365818023682, "logits_per_char": -0.7068682909011841, "bits_per_byte": 1.0197953778455282, "num_chars": 2}, {"sum_logits": -1.0454161167144775, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.0454161167144775, "logits_per_char": -0.5227080583572388, "bits_per_byte": 0.7541083236252291, "num_chars": 2}, {"sum_logits": -1.4534075260162354, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4534075260162354, "logits_per_char": -0.7267037630081177, "bits_per_byte": 1.0484119150879132, "num_chars": 2}, {"sum_logits": -1.8583757877349854, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.8583757877349854, "logits_per_char": -0.9291878938674927, "bits_per_byte": 1.3405347665376552, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 726, "native_id": "Mercury_192220", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6909263134002686, "logits_per_token_corr": -1.6909263134002686, "logits_per_char_corr": -0.8454631567001343, "bits_per_byte_corr": 1.2197455034264886}, "model_output": [{"sum_logits": -1.3105151653289795, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.3105151653289795, "logits_per_char": -0.6552575826644897, "bits_per_byte": 0.9453368650156285, "num_chars": 2}, {"sum_logits": -1.3130061626434326, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3130061626434326, "logits_per_char": -0.6565030813217163, "bits_per_byte": 0.9471337397518445, "num_chars": 2}, {"sum_logits": -1.3471801280975342, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3471801280975342, "logits_per_char": -0.6735900640487671, "bits_per_byte": 0.9717850449959337, "num_chars": 2}, {"sum_logits": -1.6909263134002686, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.6909263134002686, "logits_per_char": -0.8454631567001343, "bits_per_byte": 1.2197455034264886, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 727, "native_id": "Mercury_SC_407247", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2766296863555908, "logits_per_token_corr": -1.2766296863555908, "logits_per_char_corr": -0.6383148431777954, "bits_per_byte_corr": 0.9208936587790835}, "model_output": [{"sum_logits": -1.4015305042266846, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.4015305042266846, "logits_per_char": -0.7007652521133423, "bits_per_byte": 1.0109905540519495, "num_chars": 2}, {"sum_logits": -1.3137562274932861, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.3137562274932861, "logits_per_char": -0.6568781137466431, "bits_per_byte": 0.9476747971714592, "num_chars": 2}, {"sum_logits": -1.2766296863555908, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": true, "logits_per_token": -1.2766296863555908, "logits_per_char": -0.6383148431777954, "bits_per_byte": 0.9208936587790835, "num_chars": 2}, {"sum_logits": -1.6788208484649658, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.6788208484649658, "logits_per_char": -0.8394104242324829, "bits_per_byte": 1.211013256311574, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 728, "native_id": "Mercury_7024798", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5939302444458008, "logits_per_token_corr": -1.5939302444458008, "logits_per_char_corr": -0.7969651222229004, "bits_per_byte_corr": 1.1497776295932711}, "model_output": [{"sum_logits": -1.5408849716186523, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.5408849716186523, "logits_per_char": -0.7704424858093262, "bits_per_byte": 1.111513553568079, "num_chars": 2}, {"sum_logits": -1.4389238357543945, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4389238357543945, "logits_per_char": -0.7194619178771973, "bits_per_byte": 1.0379641410306408, "num_chars": 2}, {"sum_logits": -1.1197004318237305, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.1197004318237305, "logits_per_char": -0.5598502159118652, "bits_per_byte": 0.8076931301372436, "num_chars": 2}, {"sum_logits": -1.5939302444458008, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.5939302444458008, "logits_per_char": -0.7969651222229004, "bits_per_byte": 1.1497776295932711, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 729, "native_id": "Mercury_7180810", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4426071643829346, "logits_per_token_corr": -1.4426071643829346, "logits_per_char_corr": -0.7213035821914673, "bits_per_byte_corr": 1.0406211010038222}, "model_output": [{"sum_logits": -1.3894989490509033, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.3894989490509033, "logits_per_char": -0.6947494745254517, "bits_per_byte": 1.0023116215588024, "num_chars": 2}, {"sum_logits": -0.9823047518730164, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -0.9823047518730164, "logits_per_char": -0.4911523759365082, "bits_per_byte": 0.7085830970849413, "num_chars": 2}, {"sum_logits": -1.4426071643829346, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.4426071643829346, "logits_per_char": -0.7213035821914673, "bits_per_byte": 1.0406211010038222, "num_chars": 2}, {"sum_logits": -2.0669071674346924, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -2.0669071674346924, "logits_per_char": -1.0334535837173462, "bits_per_byte": 1.4909583602190137, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 730, "native_id": "Mercury_412780", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9507561326026917, "logits_per_token_corr": -0.9507561326026917, "logits_per_char_corr": -0.4753780663013458, "bits_per_byte_corr": 0.6858255788008292}, "model_output": [{"sum_logits": -1.4369049072265625, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.4369049072265625, "logits_per_char": -0.7184524536132812, "bits_per_byte": 1.0365077919431336, "num_chars": 2}, {"sum_logits": -0.9507561326026917, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": true, "logits_per_token": -0.9507561326026917, "logits_per_char": -0.4753780663013458, "bits_per_byte": 0.6858255788008292, "num_chars": 2}, {"sum_logits": -1.3401985168457031, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.3401985168457031, "logits_per_char": -0.6700992584228516, "bits_per_byte": 0.9667488770307144, "num_chars": 2}, {"sum_logits": -2.2788848876953125, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -2.2788848876953125, "logits_per_char": -1.1394424438476562, "bits_per_byte": 1.6438679631185957, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 731, "native_id": "LEAP_2011_8_10434", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9187057018280029, "logits_per_token_corr": -0.9187057018280029, "logits_per_char_corr": -0.45935285091400146, "bits_per_byte_corr": 0.6627060800323133}, "model_output": [{"sum_logits": -1.4615790843963623, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.4615790843963623, "logits_per_char": -0.7307895421981812, "bits_per_byte": 1.0543064484635893, "num_chars": 2}, {"sum_logits": -0.9187057018280029, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": true, "logits_per_token": -0.9187057018280029, "logits_per_char": -0.45935285091400146, "bits_per_byte": 0.6627060800323133, "num_chars": 2}, {"sum_logits": -1.4384791851043701, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.4384791851043701, "logits_per_char": -0.7192395925521851, "bits_per_byte": 1.0376433933867815, "num_chars": 2}, {"sum_logits": -2.150231122970581, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -2.150231122970581, "logits_per_char": -1.0751155614852905, "bits_per_byte": 1.5510638889384964, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 732, "native_id": "Mercury_7200340", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9318026304244995, "logits_per_token_corr": -1.9318026304244995, "logits_per_char_corr": -0.9659013152122498, "bits_per_byte_corr": 1.3935010374458414}, "model_output": [{"sum_logits": -1.3220404386520386, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3220404386520386, "logits_per_char": -0.6610202193260193, "bits_per_byte": 0.9536505923496681, "num_chars": 2}, {"sum_logits": -1.3053644895553589, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3053644895553589, "logits_per_char": -0.6526822447776794, "bits_per_byte": 0.9416214378177111, "num_chars": 2}, {"sum_logits": -1.1962088346481323, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.1962088346481323, "logits_per_char": -0.5981044173240662, "bits_per_byte": 0.8628822768078331, "num_chars": 2}, {"sum_logits": -1.9318026304244995, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.9318026304244995, "logits_per_char": -0.9659013152122498, "bits_per_byte": 1.3935010374458414, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 733, "native_id": "Mercury_7056525", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.424613118171692, "logits_per_token_corr": -1.424613118171692, "logits_per_char_corr": -0.712306559085846, "bits_per_byte_corr": 1.0276411403865697}, "model_output": [{"sum_logits": -1.1969596147537231, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.1969596147537231, "logits_per_char": -0.5984798073768616, "bits_per_byte": 0.8634238501754006, "num_chars": 2}, {"sum_logits": -1.1942225694656372, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.1942225694656372, "logits_per_char": -0.5971112847328186, "bits_per_byte": 0.8614494893434941, "num_chars": 2}, {"sum_logits": -1.424613118171692, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.424613118171692, "logits_per_char": -0.712306559085846, "bits_per_byte": 1.0276411403865697, "num_chars": 2}, {"sum_logits": -1.944539189338684, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.944539189338684, "logits_per_char": -0.972269594669342, "bits_per_byte": 1.40268852263759, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 734, "native_id": "Mercury_7085278", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9807467460632324, "logits_per_token_corr": -1.9807467460632324, "logits_per_char_corr": -0.9903733730316162, "bits_per_byte_corr": 1.4288067539022147}, "model_output": [{"sum_logits": -1.369737148284912, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.369737148284912, "logits_per_char": -0.684868574142456, "bits_per_byte": 0.9880564955767266, "num_chars": 2}, {"sum_logits": -1.1009840965270996, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.1009840965270996, "logits_per_char": -0.5504920482635498, "bits_per_byte": 0.7941921480792018, "num_chars": 2}, {"sum_logits": -1.339097499847412, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.339097499847412, "logits_per_char": -0.669548749923706, "bits_per_byte": 0.9659546611490294, "num_chars": 2}, {"sum_logits": -1.9807467460632324, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.9807467460632324, "logits_per_char": -0.9903733730316162, "bits_per_byte": 1.4288067539022147, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 735, "native_id": "AKDE&ED_2008_4_35", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.570489525794983, "logits_per_token_corr": -1.570489525794983, "logits_per_char_corr": -0.7852447628974915, "bits_per_byte_corr": 1.1328687253170546}, "model_output": [{"sum_logits": -1.4528995752334595, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4528995752334595, "logits_per_char": -0.7264497876167297, "bits_per_byte": 1.0480455060502496, "num_chars": 2}, {"sum_logits": -1.3727260828018188, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3727260828018188, "logits_per_char": -0.6863630414009094, "bits_per_byte": 0.9902125560792697, "num_chars": 2}, {"sum_logits": -1.232873558998108, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.232873558998108, "logits_per_char": -0.616436779499054, "bits_per_byte": 0.8893302848054875, "num_chars": 2}, {"sum_logits": -1.570489525794983, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.570489525794983, "logits_per_char": -0.7852447628974915, "bits_per_byte": 1.1328687253170546, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 736, "native_id": "MCAS_1999_8_16", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.3281090259552, "logits_per_token_corr": -2.3281090259552, "logits_per_char_corr": -1.1640545129776, "bits_per_byte_corr": 1.6793756731984077}, "model_output": [{"sum_logits": -1.4255268573760986, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4255268573760986, "logits_per_char": -0.7127634286880493, "bits_per_byte": 1.028300263896002, "num_chars": 2}, {"sum_logits": -1.2222073078155518, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.2222073078155518, "logits_per_char": -0.6111036539077759, "bits_per_byte": 0.8816362109625071, "num_chars": 2}, {"sum_logits": -1.0396816730499268, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.0396816730499268, "logits_per_char": -0.5198408365249634, "bits_per_byte": 0.7499717969066738, "num_chars": 2}, {"sum_logits": -2.3281090259552, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -2.3281090259552, "logits_per_char": -1.1640545129776, "bits_per_byte": 1.6793756731984077, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 737, "native_id": "Mercury_SC_400063", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7826600074768066, "logits_per_token_corr": -1.7826600074768066, "logits_per_char_corr": -0.8913300037384033, "bits_per_byte_corr": 1.2859173761898595}, "model_output": [{"sum_logits": -1.2903409004211426, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.2903409004211426, "logits_per_char": -0.6451704502105713, "bits_per_byte": 0.9307842090475598, "num_chars": 2}, {"sum_logits": -1.11967134475708, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.11967134475708, "logits_per_char": -0.55983567237854, "bits_per_byte": 0.8076721482538384, "num_chars": 2}, {"sum_logits": -1.5576481819152832, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5576481819152832, "logits_per_char": -0.7788240909576416, "bits_per_byte": 1.1236056537502517, "num_chars": 2}, {"sum_logits": -1.7826600074768066, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.7826600074768066, "logits_per_char": -0.8913300037384033, "bits_per_byte": 1.2859173761898595, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 738, "native_id": "Mercury_SC_401666", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.3937697410583496, "logits_per_token_corr": -2.3937697410583496, "logits_per_char_corr": -1.1968848705291748, "bits_per_byte_corr": 1.72673986722871}, "model_output": [{"sum_logits": -0.9823737144470215, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -0.9823737144470215, "logits_per_char": -0.49118685722351074, "bits_per_byte": 0.7086328430667034, "num_chars": 2}, {"sum_logits": -1.1921420097351074, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.1921420097351074, "logits_per_char": -0.5960710048675537, "bits_per_byte": 0.8599486827407388, "num_chars": 2}, {"sum_logits": -1.5417609214782715, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.5417609214782715, "logits_per_char": -0.7708804607391357, "bits_per_byte": 1.1121454178273493, "num_chars": 2}, {"sum_logits": -2.3937697410583496, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -2.3937697410583496, "logits_per_char": -1.1968848705291748, "bits_per_byte": 1.72673986722871, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 739, "native_id": "TIMSS_2011_8_pg31", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.365419626235962, "logits_per_token_corr": -1.365419626235962, "logits_per_char_corr": -0.682709813117981, "bits_per_byte_corr": 0.9849420617522497}, "model_output": [{"sum_logits": -1.2504980564117432, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.2504980564117432, "logits_per_char": -0.6252490282058716, "bits_per_byte": 0.9020436723139026, "num_chars": 2}, {"sum_logits": -1.1306555271148682, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.1306555271148682, "logits_per_char": -0.5653277635574341, "bits_per_byte": 0.8155955609617446, "num_chars": 2}, {"sum_logits": -1.365419626235962, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.365419626235962, "logits_per_char": -0.682709813117981, "bits_per_byte": 0.9849420617522497, "num_chars": 2}, {"sum_logits": -2.093975305557251, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -2.093975305557251, "logits_per_char": -1.0469876527786255, "bits_per_byte": 1.510483894536784, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 740, "native_id": "Mercury_412673", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3647270202636719, "logits_per_token_corr": -1.3647270202636719, "logits_per_char_corr": -0.6823635101318359, "bits_per_byte_corr": 0.984442452151493}, "model_output": [{"sum_logits": -1.53411865234375, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.53411865234375, "logits_per_char": -0.767059326171875, "bits_per_byte": 1.106632685936589, "num_chars": 2}, {"sum_logits": -1.3647270202636719, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3647270202636719, "logits_per_char": -0.6823635101318359, "bits_per_byte": 0.984442452151493, "num_chars": 2}, {"sum_logits": -1.2223434448242188, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.2223434448242188, "logits_per_char": -0.6111717224121094, "bits_per_byte": 0.8817344130561497, "num_chars": 2}, {"sum_logits": -1.5157089233398438, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.5157089233398438, "logits_per_char": -0.7578544616699219, "bits_per_byte": 1.0933528735675568, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 741, "native_id": "Mercury_7130655", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.1323635578155518, "logits_per_token_corr": -2.1323635578155518, "logits_per_char_corr": -1.0661817789077759, "bits_per_byte_corr": 1.5381751651175266}, "model_output": [{"sum_logits": -1.2820789813995361, "num_tokens": 1, "num_tokens_all": 424, "is_greedy": false, "logits_per_token": -1.2820789813995361, "logits_per_char": -0.6410394906997681, "bits_per_byte": 0.9248244942472066, "num_chars": 2}, {"sum_logits": -0.9834682941436768, "num_tokens": 1, "num_tokens_all": 424, "is_greedy": true, "logits_per_token": -0.9834682941436768, "logits_per_char": -0.4917341470718384, "bits_per_byte": 0.709422415416815, "num_chars": 2}, {"sum_logits": -1.511775255203247, "num_tokens": 1, "num_tokens_all": 424, "is_greedy": false, "logits_per_token": -1.511775255203247, "logits_per_char": -0.7558876276016235, "bits_per_byte": 1.0905153318109693, "num_chars": 2}, {"sum_logits": -2.1323635578155518, "num_tokens": 1, "num_tokens_all": 424, "is_greedy": false, "logits_per_token": -2.1323635578155518, "logits_per_char": -1.0661817789077759, "bits_per_byte": 1.5381751651175266, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 742, "native_id": "MCAS_2004_5_7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1685419082641602, "logits_per_token_corr": -1.1685419082641602, "logits_per_char_corr": -0.5842709541320801, "bits_per_byte_corr": 0.8429248080624205}, "model_output": [{"sum_logits": -1.6521215438842773, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.6521215438842773, "logits_per_char": -0.8260607719421387, "bits_per_byte": 1.1917537791546886, "num_chars": 2}, {"sum_logits": -1.1685419082641602, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.1685419082641602, "logits_per_char": -0.5842709541320801, "bits_per_byte": 0.8429248080624205, "num_chars": 2}, {"sum_logits": -1.418910026550293, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.418910026550293, "logits_per_char": -0.7094550132751465, "bits_per_byte": 1.023527229386603, "num_chars": 2}, {"sum_logits": -1.5006647109985352, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.5006647109985352, "logits_per_char": -0.7503323554992676, "bits_per_byte": 1.0825007682981058, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 743, "native_id": "Mercury_7187373", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6695419549942017, "logits_per_token_corr": -1.6695419549942017, "logits_per_char_corr": -0.8347709774971008, "bits_per_byte_corr": 1.2043199495139651}, "model_output": [{"sum_logits": -1.5771538019180298, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.5771538019180298, "logits_per_char": -0.7885769009590149, "bits_per_byte": 1.1376759843739754, "num_chars": 2}, {"sum_logits": -1.2067981958389282, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.2067981958389282, "logits_per_char": -0.6033990979194641, "bits_per_byte": 0.8705208862459103, "num_chars": 2}, {"sum_logits": -1.2319103479385376, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.2319103479385376, "logits_per_char": -0.6159551739692688, "bits_per_byte": 0.8886354748960013, "num_chars": 2}, {"sum_logits": -1.6695419549942017, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.6695419549942017, "logits_per_char": -0.8347709774971008, "bits_per_byte": 1.2043199495139651, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 744, "native_id": "Mercury_SC_401361", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.46268630027771, "logits_per_token_corr": -1.46268630027771, "logits_per_char_corr": -0.731343150138855, "bits_per_byte_corr": 1.0551051358941967}, "model_output": [{"sum_logits": -1.5833985805511475, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.5833985805511475, "logits_per_char": -0.7916992902755737, "bits_per_byte": 1.1421806399567027, "num_chars": 2}, {"sum_logits": -1.1705052852630615, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.1705052852630615, "logits_per_char": -0.5852526426315308, "bits_per_byte": 0.8443410851922768, "num_chars": 2}, {"sum_logits": -1.46268630027771, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.46268630027771, "logits_per_char": -0.731343150138855, "bits_per_byte": 1.0551051358941967, "num_chars": 2}, {"sum_logits": -1.4331223964691162, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4331223964691162, "logits_per_char": -0.7165611982345581, "bits_per_byte": 1.0337792871871931, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 745, "native_id": "MCAS_2006_8_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.840634822845459, "logits_per_token_corr": -1.840634822845459, "logits_per_char_corr": -0.9203174114227295, "bits_per_byte_corr": 1.3277373655042937}, "model_output": [{"sum_logits": -1.4574332237243652, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4574332237243652, "logits_per_char": -0.7287166118621826, "bits_per_byte": 1.0513158421477338, "num_chars": 2}, {"sum_logits": -1.1347460746765137, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.1347460746765137, "logits_per_char": -0.5673730373382568, "bits_per_byte": 0.8185462673025998, "num_chars": 2}, {"sum_logits": -1.303708553314209, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.303708553314209, "logits_per_char": -0.6518542766571045, "bits_per_byte": 0.9404269323161427, "num_chars": 2}, {"sum_logits": -1.840634822845459, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.840634822845459, "logits_per_char": -0.9203174114227295, "bits_per_byte": 1.3277373655042937, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 746, "native_id": "Mercury_7233765", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.346314787864685, "logits_per_token_corr": -1.346314787864685, "logits_per_char_corr": -0.6731573939323425, "bits_per_byte_corr": 0.9711608339646267}, "model_output": [{"sum_logits": -1.4479001760482788, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4479001760482788, "logits_per_char": -0.7239500880241394, "bits_per_byte": 1.0444392018443047, "num_chars": 2}, {"sum_logits": -1.346314787864685, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.346314787864685, "logits_per_char": -0.6731573939323425, "bits_per_byte": 0.9711608339646267, "num_chars": 2}, {"sum_logits": -1.4210256338119507, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4210256338119507, "logits_per_char": -0.7105128169059753, "bits_per_byte": 1.025053317439035, "num_chars": 2}, {"sum_logits": -1.4499067068099976, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4499067068099976, "logits_per_char": -0.7249533534049988, "bits_per_byte": 1.0458866078339673, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 747, "native_id": "Mercury_SC_407613", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5114424228668213, "logits_per_token_corr": -1.5114424228668213, "logits_per_char_corr": -0.7557212114334106, "bits_per_byte_corr": 1.0902752440303647}, "model_output": [{"sum_logits": -1.0531351566314697, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.0531351566314697, "logits_per_char": -0.5265675783157349, "bits_per_byte": 0.7596764339295673, "num_chars": 2}, {"sum_logits": -1.1652319431304932, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.1652319431304932, "logits_per_char": -0.5826159715652466, "bits_per_byte": 0.8405371729204905, "num_chars": 2}, {"sum_logits": -1.5114424228668213, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.5114424228668213, "logits_per_char": -0.7557212114334106, "bits_per_byte": 1.0902752440303647, "num_chars": 2}, {"sum_logits": -2.2497732639312744, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -2.2497732639312744, "logits_per_char": -1.1248866319656372, "bits_per_byte": 1.6228683655002791, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 748, "native_id": "MCAS_2005_5_24", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3156932592391968, "logits_per_token_corr": -1.3156932592391968, "logits_per_char_corr": -0.6578466296195984, "bits_per_byte_corr": 0.9490720702183951}, "model_output": [{"sum_logits": -1.3156932592391968, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3156932592391968, "logits_per_char": -0.6578466296195984, "bits_per_byte": 0.9490720702183951, "num_chars": 2}, {"sum_logits": -1.097561240196228, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.097561240196228, "logits_per_char": -0.548780620098114, "bits_per_byte": 0.7917230791520881, "num_chars": 2}, {"sum_logits": -1.4811328649520874, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4811328649520874, "logits_per_char": -0.7405664324760437, "bits_per_byte": 1.0684115195827872, "num_chars": 2}, {"sum_logits": -1.8780673742294312, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.8780673742294312, "logits_per_char": -0.9390336871147156, "bits_per_byte": 1.354739243629052, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 749, "native_id": "Mercury_405778", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.347910761833191, "logits_per_token_corr": -1.347910761833191, "logits_per_char_corr": -0.6739553809165955, "bits_per_byte_corr": 0.9723120858295031}, "model_output": [{"sum_logits": -1.5210198163986206, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.5210198163986206, "logits_per_char": -0.7605099081993103, "bits_per_byte": 1.097183873106854, "num_chars": 2}, {"sum_logits": -1.4150608777999878, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4150608777999878, "logits_per_char": -0.7075304388999939, "bits_per_byte": 1.0207506554797463, "num_chars": 2}, {"sum_logits": -1.347910761833191, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.347910761833191, "logits_per_char": -0.6739553809165955, "bits_per_byte": 0.9723120858295031, "num_chars": 2}, {"sum_logits": -1.35435950756073, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.35435950756073, "logits_per_char": -0.677179753780365, "bits_per_byte": 0.9769638725700437, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 750, "native_id": "Mercury_7263060", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3532589673995972, "logits_per_token_corr": -1.3532589673995972, "logits_per_char_corr": -0.6766294836997986, "bits_per_byte_corr": 0.9761700006536604}, "model_output": [{"sum_logits": -1.6108635663986206, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.6108635663986206, "logits_per_char": -0.8054317831993103, "bits_per_byte": 1.1619924393968346, "num_chars": 2}, {"sum_logits": -1.3157070875167847, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.3157070875167847, "logits_per_char": -0.6578535437583923, "bits_per_byte": 0.9490820452121451, "num_chars": 2}, {"sum_logits": -1.4081429243087769, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4081429243087769, "logits_per_char": -0.7040714621543884, "bits_per_byte": 1.0157604068823074, "num_chars": 2}, {"sum_logits": -1.3532589673995972, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3532589673995972, "logits_per_char": -0.6766294836997986, "bits_per_byte": 0.9761700006536604, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 751, "native_id": "Mercury_SC_401668", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7992502450942993, "logits_per_token_corr": -1.7992502450942993, "logits_per_char_corr": -0.8996251225471497, "bits_per_byte_corr": 1.2978847029588312}, "model_output": [{"sum_logits": -1.3539682626724243, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3539682626724243, "logits_per_char": -0.6769841313362122, "bits_per_byte": 0.9766816490399777, "num_chars": 2}, {"sum_logits": -1.2008711099624634, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.2008711099624634, "logits_per_char": -0.6004355549812317, "bits_per_byte": 0.8662453975454578, "num_chars": 2}, {"sum_logits": -1.3293997049331665, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3293997049331665, "logits_per_char": -0.6646998524665833, "bits_per_byte": 0.9589591808338542, "num_chars": 2}, {"sum_logits": -1.7992502450942993, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.7992502450942993, "logits_per_char": -0.8996251225471497, "bits_per_byte": 1.2978847029588312, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 752, "native_id": "Mercury_7230388", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5365500450134277, "logits_per_token_corr": -1.5365500450134277, "logits_per_char_corr": -0.7682750225067139, "bits_per_byte_corr": 1.1083865650100893}, "model_output": [{"sum_logits": -1.4978632926940918, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4978632926940918, "logits_per_char": -0.7489316463470459, "bits_per_byte": 1.0804799721504663, "num_chars": 2}, {"sum_logits": -1.2442679405212402, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.2442679405212402, "logits_per_char": -0.6221339702606201, "bits_per_byte": 0.8975495936642033, "num_chars": 2}, {"sum_logits": -1.350114345550537, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.350114345550537, "logits_per_char": -0.6750571727752686, "bits_per_byte": 0.9739016354801038, "num_chars": 2}, {"sum_logits": -1.5365500450134277, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.5365500450134277, "logits_per_char": -0.7682750225067139, "bits_per_byte": 1.1083865650100893, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 753, "native_id": "Mercury_7041650", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5492607355117798, "logits_per_token_corr": -1.5492607355117798, "logits_per_char_corr": -0.7746303677558899, "bits_per_byte_corr": 1.1175553900842192}, "model_output": [{"sum_logits": -1.6242233514785767, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.6242233514785767, "logits_per_char": -0.8121116757392883, "bits_per_byte": 1.171629487237939, "num_chars": 2}, {"sum_logits": -1.2904239892959595, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.2904239892959595, "logits_per_char": -0.6452119946479797, "bits_per_byte": 0.9308441450013856, "num_chars": 2}, {"sum_logits": -1.2003647089004517, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.2003647089004517, "logits_per_char": -0.6001823544502258, "bits_per_byte": 0.865880106395025, "num_chars": 2}, {"sum_logits": -1.5492607355117798, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.5492607355117798, "logits_per_char": -0.7746303677558899, "bits_per_byte": 1.1175553900842192, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 754, "native_id": "Mercury_SC_409009", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2095915079116821, "logits_per_token_corr": -1.2095915079116821, "logits_per_char_corr": -0.6047957539558411, "bits_per_byte_corr": 0.8725358349834205}, "model_output": [{"sum_logits": -1.2095915079116821, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.2095915079116821, "logits_per_char": -0.6047957539558411, "bits_per_byte": 0.8725358349834205, "num_chars": 2}, {"sum_logits": -0.9887357950210571, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -0.9887357950210571, "logits_per_char": -0.49436789751052856, "bits_per_byte": 0.7132221141136553, "num_chars": 2}, {"sum_logits": -1.5633798837661743, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.5633798837661743, "logits_per_char": -0.7816899418830872, "bits_per_byte": 1.127740202668322, "num_chars": 2}, {"sum_logits": -2.19285249710083, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -2.19285249710083, "logits_per_char": -1.096426248550415, "bits_per_byte": 1.58180871148531, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 755, "native_id": "Mercury_7223143", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.2578988075256348, "logits_per_token_corr": -2.2578988075256348, "logits_per_char_corr": -1.1289494037628174, "bits_per_byte_corr": 1.6287297062243389}, "model_output": [{"sum_logits": -1.1413253545761108, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.1413253545761108, "logits_per_char": -0.5706626772880554, "bits_per_byte": 0.823292214544488, "num_chars": 2}, {"sum_logits": -1.0953162908554077, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.0953162908554077, "logits_per_char": -0.5476581454277039, "bits_per_byte": 0.7901036905115627, "num_chars": 2}, {"sum_logits": -1.4906657934188843, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4906657934188843, "logits_per_char": -0.7453328967094421, "bits_per_byte": 1.0752880738948907, "num_chars": 2}, {"sum_logits": -2.2578988075256348, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -2.2578988075256348, "logits_per_char": -1.1289494037628174, "bits_per_byte": 1.6287297062243389, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 756, "native_id": "ACTAAP_2007_7_3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.3154349327087402, "logits_per_token_corr": -2.3154349327087402, "logits_per_char_corr": -1.1577174663543701, "bits_per_byte_corr": 1.6702332474611852}, "model_output": [{"sum_logits": -1.345145583152771, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.345145583152771, "logits_per_char": -0.6725727915763855, "bits_per_byte": 0.9703174310447948, "num_chars": 2}, {"sum_logits": -0.9722722768783569, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -0.9722722768783569, "logits_per_char": -0.48613613843917847, "bits_per_byte": 0.7013461961236173, "num_chars": 2}, {"sum_logits": -1.373456358909607, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.373456358909607, "logits_per_char": -0.6867281794548035, "bits_per_byte": 0.9907393389388629, "num_chars": 2}, {"sum_logits": -2.3154349327087402, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -2.3154349327087402, "logits_per_char": -1.1577174663543701, "bits_per_byte": 1.6702332474611852, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 757, "native_id": "Mercury_7215670", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3115854263305664, "logits_per_token_corr": -1.3115854263305664, "logits_per_char_corr": -0.6557927131652832, "bits_per_byte_corr": 0.9461088951353522}, "model_output": [{"sum_logits": -1.3212957382202148, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.3212957382202148, "logits_per_char": -0.6606478691101074, "bits_per_byte": 0.9531134045396977, "num_chars": 2}, {"sum_logits": -1.2073659896850586, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": true, "logits_per_token": -1.2073659896850586, "logits_per_char": -0.6036829948425293, "bits_per_byte": 0.8709304629289404, "num_chars": 2}, {"sum_logits": -1.3115854263305664, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.3115854263305664, "logits_per_char": -0.6557927131652832, "bits_per_byte": 0.9461088951353522, "num_chars": 2}, {"sum_logits": -1.892256736755371, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.892256736755371, "logits_per_char": -0.9461283683776855, "bits_per_byte": 1.364974705103834, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 758, "native_id": "MEA_2010_8_15", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.441544532775879, "logits_per_token_corr": -1.441544532775879, "logits_per_char_corr": -0.7207722663879395, "bits_per_byte_corr": 1.0398545743289263}, "model_output": [{"sum_logits": -1.441544532775879, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.441544532775879, "logits_per_char": -0.7207722663879395, "bits_per_byte": 1.0398545743289263, "num_chars": 2}, {"sum_logits": -1.1142091751098633, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.1142091751098633, "logits_per_char": -0.5571045875549316, "bits_per_byte": 0.8037320257225686, "num_chars": 2}, {"sum_logits": -1.313368797302246, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.313368797302246, "logits_per_char": -0.656684398651123, "bits_per_byte": 0.947395325363807, "num_chars": 2}, {"sum_logits": -1.8461236953735352, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.8461236953735352, "logits_per_char": -0.9230618476867676, "bits_per_byte": 1.33169675009246, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 759, "native_id": "Mercury_7270515", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2091903686523438, "logits_per_token_corr": -1.2091903686523438, "logits_per_char_corr": -0.6045951843261719, "bits_per_byte_corr": 0.8722464741733436}, "model_output": [{"sum_logits": -1.167776107788086, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.167776107788086, "logits_per_char": -0.583888053894043, "bits_per_byte": 0.8423723997878488, "num_chars": 2}, {"sum_logits": -1.2091903686523438, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.2091903686523438, "logits_per_char": -0.6045951843261719, "bits_per_byte": 0.8722464741733436, "num_chars": 2}, {"sum_logits": -1.3367843627929688, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3367843627929688, "logits_per_char": -0.6683921813964844, "bits_per_byte": 0.9642860854703572, "num_chars": 2}, {"sum_logits": -2.1998214721679688, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -2.1998214721679688, "logits_per_char": -1.0999107360839844, "bits_per_byte": 1.5868357643700337, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 760, "native_id": "Mercury_7006160", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4561618566513062, "logits_per_token_corr": -1.4561618566513062, "logits_per_char_corr": -0.7280809283256531, "bits_per_byte_corr": 1.0503987446620071}, "model_output": [{"sum_logits": -1.3353255987167358, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3353255987167358, "logits_per_char": -0.6676627993583679, "bits_per_byte": 0.9632338096210524, "num_chars": 2}, {"sum_logits": -0.9705089330673218, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -0.9705089330673218, "logits_per_char": -0.4852544665336609, "bits_per_byte": 0.700074212437835, "num_chars": 2}, {"sum_logits": -1.4561618566513062, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4561618566513062, "logits_per_char": -0.7280809283256531, "bits_per_byte": 1.0503987446620071, "num_chars": 2}, {"sum_logits": -2.161594867706299, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -2.161594867706299, "logits_per_char": -1.0807974338531494, "bits_per_byte": 1.5592610980265764, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 761, "native_id": "Mercury_SC_410630", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4600237607955933, "logits_per_token_corr": -1.4600237607955933, "logits_per_char_corr": -0.7300118803977966, "bits_per_byte_corr": 1.0531845196406848}, "model_output": [{"sum_logits": -1.540042757987976, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.540042757987976, "logits_per_char": -0.770021378993988, "bits_per_byte": 1.1109060248539058, "num_chars": 2}, {"sum_logits": -1.107751727104187, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.107751727104187, "logits_per_char": -0.5538758635520935, "bits_per_byte": 0.7990739616152716, "num_chars": 2}, {"sum_logits": -1.4600237607955933, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4600237607955933, "logits_per_char": -0.7300118803977966, "bits_per_byte": 1.0531845196406848, "num_chars": 2}, {"sum_logits": -1.5734041929244995, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.5734041929244995, "logits_per_char": -0.7867020964622498, "bits_per_byte": 1.134971213223854, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 762, "native_id": "Mercury_7082320", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.314112663269043, "logits_per_token_corr": -1.314112663269043, "logits_per_char_corr": -0.6570563316345215, "bits_per_byte_corr": 0.9479319112344993}, "model_output": [{"sum_logits": -1.314112663269043, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.314112663269043, "logits_per_char": -0.6570563316345215, "bits_per_byte": 0.9479319112344993, "num_chars": 2}, {"sum_logits": -1.103318214416504, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.103318214416504, "logits_per_char": -0.551659107208252, "bits_per_byte": 0.7958758582311499, "num_chars": 2}, {"sum_logits": -1.426854133605957, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.426854133605957, "logits_per_char": -0.7134270668029785, "bits_per_byte": 1.0292576913133558, "num_chars": 2}, {"sum_logits": -1.935023307800293, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.935023307800293, "logits_per_char": -0.9675116539001465, "bits_per_byte": 1.3958242650850232, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 763, "native_id": "MEA_2013_8_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4302293062210083, "logits_per_token_corr": -1.4302293062210083, "logits_per_char_corr": -0.7151146531105042, "bits_per_byte_corr": 1.0316923637102968}, "model_output": [{"sum_logits": -1.4302293062210083, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4302293062210083, "logits_per_char": -0.7151146531105042, "bits_per_byte": 1.0316923637102968, "num_chars": 2}, {"sum_logits": -1.0944024324417114, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.0944024324417114, "logits_per_char": -0.5472012162208557, "bits_per_byte": 0.7894444810108051, "num_chars": 2}, {"sum_logits": -1.4043465852737427, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4043465852737427, "logits_per_char": -0.7021732926368713, "bits_per_byte": 1.0130219271326169, "num_chars": 2}, {"sum_logits": -1.7610703706741333, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.7610703706741333, "logits_per_char": -0.8805351853370667, "bits_per_byte": 1.270343745214943, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 764, "native_id": "Mercury_7033845", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2918915748596191, "logits_per_token_corr": -1.2918915748596191, "logits_per_char_corr": -0.6459457874298096, "bits_per_byte_corr": 0.9319027842087723}, "model_output": [{"sum_logits": -1.4017281532287598, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4017281532287598, "logits_per_char": -0.7008640766143799, "bits_per_byte": 1.0111331276695148, "num_chars": 2}, {"sum_logits": -1.2918915748596191, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.2918915748596191, "logits_per_char": -0.6459457874298096, "bits_per_byte": 0.9319027842087723, "num_chars": 2}, {"sum_logits": -1.336296558380127, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.336296558380127, "logits_per_char": -0.6681482791900635, "bits_per_byte": 0.9639342089666917, "num_chars": 2}, {"sum_logits": -1.593895435333252, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.593895435333252, "logits_per_char": -0.796947717666626, "bits_per_byte": 1.149752520126245, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 765, "native_id": "Mercury_7221620", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6096138954162598, "logits_per_token_corr": -1.6096138954162598, "logits_per_char_corr": -0.8048069477081299, "bits_per_byte_corr": 1.1610909923323365}, "model_output": [{"sum_logits": -1.6096138954162598, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.6096138954162598, "logits_per_char": -0.8048069477081299, "bits_per_byte": 1.1610909923323365, "num_chars": 2}, {"sum_logits": -1.1487374305725098, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.1487374305725098, "logits_per_char": -0.5743687152862549, "bits_per_byte": 0.8286388971858402, "num_chars": 2}, {"sum_logits": -1.3700814247131348, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.3700814247131348, "logits_per_char": -0.6850407123565674, "bits_per_byte": 0.9883048385245726, "num_chars": 2}, {"sum_logits": -1.5287652015686035, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.5287652015686035, "logits_per_char": -0.7643826007843018, "bits_per_byte": 1.1027709874941127, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 766, "native_id": "LEAP__7_10352", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.226207733154297, "logits_per_token_corr": -2.226207733154297, "logits_per_char_corr": -1.1131038665771484, "bits_per_byte_corr": 1.6058694283063362}, "model_output": [{"sum_logits": -1.5121288299560547, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.5121288299560547, "logits_per_char": -0.7560644149780273, "bits_per_byte": 1.090770382082199, "num_chars": 2}, {"sum_logits": -0.8084259033203125, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": true, "logits_per_token": -0.8084259033203125, "logits_per_char": -0.40421295166015625, "bits_per_byte": 0.5831560208236166, "num_chars": 2}, {"sum_logits": -1.5227622985839844, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.5227622985839844, "logits_per_char": -0.7613811492919922, "bits_per_byte": 1.0984408083106858, "num_chars": 2}, {"sum_logits": -2.226207733154297, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -2.226207733154297, "logits_per_char": -1.1131038665771484, "bits_per_byte": 1.6058694283063362, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 767, "native_id": "Mercury_412605", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.412296175956726, "logits_per_token_corr": -1.412296175956726, "logits_per_char_corr": -0.706148087978363, "bits_per_byte_corr": 1.0187563446603396}, "model_output": [{"sum_logits": -1.4038676023483276, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.4038676023483276, "logits_per_char": -0.7019338011741638, "bits_per_byte": 1.0126764139870332, "num_chars": 2}, {"sum_logits": -1.1741503477096558, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": true, "logits_per_token": -1.1741503477096558, "logits_per_char": -0.5870751738548279, "bits_per_byte": 0.8469704419499947, "num_chars": 2}, {"sum_logits": -1.412296175956726, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.412296175956726, "logits_per_char": -0.706148087978363, "bits_per_byte": 1.0187563446603396, "num_chars": 2}, {"sum_logits": -1.7164191007614136, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.7164191007614136, "logits_per_char": -0.8582095503807068, "bits_per_byte": 1.2381346623786822, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 768, "native_id": "Mercury_416638", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.274066686630249, "logits_per_token_corr": -1.274066686630249, "logits_per_char_corr": -0.6370333433151245, "bits_per_byte_corr": 0.9190448452823069}, "model_output": [{"sum_logits": -1.274066686630249, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.274066686630249, "logits_per_char": -0.6370333433151245, "bits_per_byte": 0.9190448452823069, "num_chars": 2}, {"sum_logits": -1.159332036972046, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.159332036972046, "logits_per_char": -0.579666018486023, "bits_per_byte": 0.8362812902422363, "num_chars": 2}, {"sum_logits": -1.408944845199585, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.408944845199585, "logits_per_char": -0.7044724225997925, "bits_per_byte": 1.0163388705284848, "num_chars": 2}, {"sum_logits": -1.9189565181732178, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.9189565181732178, "logits_per_char": -0.9594782590866089, "bits_per_byte": 1.3842345262260212, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 769, "native_id": "MCAS_2011_8_17694", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4697436094284058, "logits_per_token_corr": -1.4697436094284058, "logits_per_char_corr": -0.7348718047142029, "bits_per_byte_corr": 1.060195908351065}, "model_output": [{"sum_logits": -1.4697436094284058, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4697436094284058, "logits_per_char": -0.7348718047142029, "bits_per_byte": 1.060195908351065, "num_chars": 2}, {"sum_logits": -1.2714766263961792, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.2714766263961792, "logits_per_char": -0.6357383131980896, "bits_per_byte": 0.9171765117546574, "num_chars": 2}, {"sum_logits": -1.3085097074508667, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3085097074508667, "logits_per_char": -0.6542548537254333, "bits_per_byte": 0.943890232947895, "num_chars": 2}, {"sum_logits": -1.5874212980270386, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.5874212980270386, "logits_per_char": -0.7937106490135193, "bits_per_byte": 1.1450824172333876, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 770, "native_id": "Mercury_SC_400012", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.811439871788025, "logits_per_token_corr": -1.811439871788025, "logits_per_char_corr": -0.9057199358940125, "bits_per_byte_corr": 1.3066776599495005}, "model_output": [{"sum_logits": -1.3800376653671265, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.3800376653671265, "logits_per_char": -0.6900188326835632, "bits_per_byte": 0.9954867480332833, "num_chars": 2}, {"sum_logits": -1.155727744102478, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.155727744102478, "logits_per_char": -0.577863872051239, "bits_per_byte": 0.8336813425178159, "num_chars": 2}, {"sum_logits": -1.343908667564392, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.343908667564392, "logits_per_char": -0.671954333782196, "bits_per_byte": 0.9694251850521179, "num_chars": 2}, {"sum_logits": -1.811439871788025, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.811439871788025, "logits_per_char": -0.9057199358940125, "bits_per_byte": 1.3066776599495005, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 771, "native_id": "Mercury_SC_413458", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.282502293586731, "logits_per_token_corr": -1.282502293586731, "logits_per_char_corr": -0.6412511467933655, "bits_per_byte_corr": 0.9251298494438137}, "model_output": [{"sum_logits": -1.3844081163406372, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3844081163406372, "logits_per_char": -0.6922040581703186, "bits_per_byte": 0.9986393620062517, "num_chars": 2}, {"sum_logits": -1.282502293586731, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.282502293586731, "logits_per_char": -0.6412511467933655, "bits_per_byte": 0.9251298494438137, "num_chars": 2}, {"sum_logits": -1.2516833543777466, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2516833543777466, "logits_per_char": -0.6258416771888733, "bits_per_byte": 0.9028986840626677, "num_chars": 2}, {"sum_logits": -1.7679682970046997, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.7679682970046997, "logits_per_char": -0.8839841485023499, "bits_per_byte": 1.2753195472697094, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 772, "native_id": "Mercury_7139545", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2660853862762451, "logits_per_token_corr": -1.2660853862762451, "logits_per_char_corr": -0.6330426931381226, "bits_per_byte_corr": 0.9132875540620193}, "model_output": [{"sum_logits": -1.6409785747528076, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.6409785747528076, "logits_per_char": -0.8204892873764038, "bits_per_byte": 1.1837158260013079, "num_chars": 2}, {"sum_logits": -1.3898398876190186, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3898398876190186, "logits_per_char": -0.6949199438095093, "bits_per_byte": 1.0025575567495364, "num_chars": 2}, {"sum_logits": -1.2660853862762451, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.2660853862762451, "logits_per_char": -0.6330426931381226, "bits_per_byte": 0.9132875540620193, "num_chars": 2}, {"sum_logits": -1.3517043590545654, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3517043590545654, "logits_per_char": -0.6758521795272827, "bits_per_byte": 0.9750485877787087, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 773, "native_id": "NYSEDREGENTS_2015_4_5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1630762815475464, "logits_per_token_corr": -1.1630762815475464, "logits_per_char_corr": -0.5815381407737732, "bits_per_byte_corr": 0.8389821917827133}, "model_output": [{"sum_logits": -1.4991148710250854, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4991148710250854, "logits_per_char": -0.7495574355125427, "bits_per_byte": 1.0813827950761712, "num_chars": 2}, {"sum_logits": -1.1630762815475464, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.1630762815475464, "logits_per_char": -0.5815381407737732, "bits_per_byte": 0.8389821917827133, "num_chars": 2}, {"sum_logits": -1.3066729307174683, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3066729307174683, "logits_per_char": -0.6533364653587341, "bits_per_byte": 0.942565278605647, "num_chars": 2}, {"sum_logits": -1.7173861265182495, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.7173861265182495, "logits_per_char": -0.8586930632591248, "bits_per_byte": 1.2388322240105822, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 774, "native_id": "TIMSS_2003_8_pg16", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5294302701950073, "logits_per_token_corr": -1.5294302701950073, "logits_per_char_corr": -0.7647151350975037, "bits_per_byte_corr": 1.103250733098695}, "model_output": [{"sum_logits": -1.542240023612976, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.542240023612976, "logits_per_char": -0.771120011806488, "bits_per_byte": 1.1124910169642586, "num_chars": 2}, {"sum_logits": -1.2521227598190308, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.2521227598190308, "logits_per_char": -0.6260613799095154, "bits_per_byte": 0.9032156480882081, "num_chars": 2}, {"sum_logits": -1.5294302701950073, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.5294302701950073, "logits_per_char": -0.7647151350975037, "bits_per_byte": 1.103250733098695, "num_chars": 2}, {"sum_logits": -1.334976077079773, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.334976077079773, "logits_per_char": -0.6674880385398865, "bits_per_byte": 0.9629816830548873, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 775, "native_id": "Mercury_SC_415073", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.301106333732605, "logits_per_token_corr": -1.301106333732605, "logits_per_char_corr": -0.6505531668663025, "bits_per_byte_corr": 0.9385498276732992}, "model_output": [{"sum_logits": -1.301106333732605, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.301106333732605, "logits_per_char": -0.6505531668663025, "bits_per_byte": 0.9385498276732992, "num_chars": 2}, {"sum_logits": -1.1328781843185425, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.1328781843185425, "logits_per_char": -0.5664390921592712, "bits_per_byte": 0.8171988692244142, "num_chars": 2}, {"sum_logits": -1.4111851453781128, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4111851453781128, "logits_per_char": -0.7055925726890564, "bits_per_byte": 1.0179549055073185, "num_chars": 2}, {"sum_logits": -1.9108914136886597, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.9108914136886597, "logits_per_char": -0.9554457068443298, "bits_per_byte": 1.3784167831039553, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 776, "native_id": "Mercury_7012880", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4753074645996094, "logits_per_token_corr": -1.4753074645996094, "logits_per_char_corr": -0.7376537322998047, "bits_per_byte_corr": 1.0642093814829277}, "model_output": [{"sum_logits": -1.4753074645996094, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4753074645996094, "logits_per_char": -0.7376537322998047, "bits_per_byte": 1.0642093814829277, "num_chars": 2}, {"sum_logits": -1.1996994018554688, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.1996994018554688, "logits_per_char": -0.5998497009277344, "bits_per_byte": 0.8654001888077919, "num_chars": 2}, {"sum_logits": -1.3308391571044922, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.3308391571044922, "logits_per_char": -0.6654195785522461, "bits_per_byte": 0.9599975260884391, "num_chars": 2}, {"sum_logits": -1.6339950561523438, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.6339950561523438, "logits_per_char": -0.8169975280761719, "bits_per_byte": 1.1786782821748816, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 777, "native_id": "Mercury_191625", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6539535522460938, "logits_per_token_corr": -1.6539535522460938, "logits_per_char_corr": -0.8269767761230469, "bits_per_byte_corr": 1.1930752938439193}, "model_output": [{"sum_logits": -1.5630073547363281, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.5630073547363281, "logits_per_char": -0.7815036773681641, "bits_per_byte": 1.1274714797763488, "num_chars": 2}, {"sum_logits": -1.219970703125, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.219970703125, "logits_per_char": -0.6099853515625, "bits_per_byte": 0.8800228417147619, "num_chars": 2}, {"sum_logits": -1.2275619506835938, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.2275619506835938, "logits_per_char": -0.6137809753417969, "bits_per_byte": 0.8854987693182377, "num_chars": 2}, {"sum_logits": -1.6539535522460938, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.6539535522460938, "logits_per_char": -0.8269767761230469, "bits_per_byte": 1.1930752938439193, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 778, "native_id": "Mercury_SC_402985", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1799482107162476, "logits_per_token_corr": -1.1799482107162476, "logits_per_char_corr": -0.5899741053581238, "bits_per_byte_corr": 0.8511527160536795}, "model_output": [{"sum_logits": -1.5097631216049194, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5097631216049194, "logits_per_char": -0.7548815608024597, "bits_per_byte": 1.0890638842290115, "num_chars": 2}, {"sum_logits": -1.1799482107162476, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.1799482107162476, "logits_per_char": -0.5899741053581238, "bits_per_byte": 0.8511527160536795, "num_chars": 2}, {"sum_logits": -1.2557519674301147, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.2557519674301147, "logits_per_char": -0.6278759837150574, "bits_per_byte": 0.9058335679996437, "num_chars": 2}, {"sum_logits": -1.7563680410385132, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.7563680410385132, "logits_per_char": -0.8781840205192566, "bits_per_byte": 1.2669517313919734, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 779, "native_id": "Mercury_7005425", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3980344533920288, "logits_per_token_corr": -1.3980344533920288, "logits_per_char_corr": -0.6990172266960144, "bits_per_byte_corr": 1.0084686864510208}, "model_output": [{"sum_logits": -1.3980344533920288, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.3980344533920288, "logits_per_char": -0.6990172266960144, "bits_per_byte": 1.0084686864510208, "num_chars": 2}, {"sum_logits": -1.2445443868637085, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": true, "logits_per_token": -1.2445443868637085, "logits_per_char": -0.6222721934318542, "bits_per_byte": 0.8977490075478789, "num_chars": 2}, {"sum_logits": -1.3888429403305054, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.3888429403305054, "logits_per_char": -0.6944214701652527, "bits_per_byte": 1.001838411294953, "num_chars": 2}, {"sum_logits": -1.6107877492904663, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.6107877492904663, "logits_per_char": -0.8053938746452332, "bits_per_byte": 1.16193774891386, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 780, "native_id": "MDSA_2013_8_40", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4993878602981567, "logits_per_token_corr": -1.4993878602981567, "logits_per_char_corr": -0.7496939301490784, "bits_per_byte_corr": 1.0815797152114093}, "model_output": [{"sum_logits": -1.2612344026565552, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.2612344026565552, "logits_per_char": -0.6306172013282776, "bits_per_byte": 0.9097883090562368, "num_chars": 2}, {"sum_logits": -0.9468632936477661, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -0.9468632936477661, "logits_per_char": -0.47343164682388306, "bits_per_byte": 0.6830174890732019, "num_chars": 2}, {"sum_logits": -1.4993878602981567, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.4993878602981567, "logits_per_char": -0.7496939301490784, "bits_per_byte": 1.0815797152114093, "num_chars": 2}, {"sum_logits": -2.331878662109375, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -2.331878662109375, "logits_per_char": -1.1659393310546875, "bits_per_byte": 1.6820948908912015, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 781, "native_id": "Mercury_401684", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3148345947265625, "logits_per_token_corr": -1.3148345947265625, "logits_per_char_corr": -0.6574172973632812, "bits_per_byte_corr": 0.9484526747013122}, "model_output": [{"sum_logits": -1.4408454895019531, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4408454895019531, "logits_per_char": -0.7204227447509766, "bits_per_byte": 1.039350321196596, "num_chars": 2}, {"sum_logits": -1.3148345947265625, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.3148345947265625, "logits_per_char": -0.6574172973632812, "bits_per_byte": 0.9484526747013122, "num_chars": 2}, {"sum_logits": -1.162801742553711, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.162801742553711, "logits_per_char": -0.5814008712768555, "bits_per_byte": 0.8387841537602445, "num_chars": 2}, {"sum_logits": -1.776498794555664, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.776498794555664, "logits_per_char": -0.888249397277832, "bits_per_byte": 1.2814730005262596, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 782, "native_id": "NCEOGA_2013_5_17", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.098037600517273, "logits_per_token_corr": -1.098037600517273, "logits_per_char_corr": -0.5490188002586365, "bits_per_byte_corr": 0.7920667004885122}, "model_output": [{"sum_logits": -1.428989291191101, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.428989291191101, "logits_per_char": -0.7144946455955505, "bits_per_byte": 1.0307978819431587, "num_chars": 2}, {"sum_logits": -1.098037600517273, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.098037600517273, "logits_per_char": -0.5490188002586365, "bits_per_byte": 0.7920667004885122, "num_chars": 2}, {"sum_logits": -1.394058108329773, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.394058108329773, "logits_per_char": -0.6970290541648865, "bits_per_byte": 1.0056003597999288, "num_chars": 2}, {"sum_logits": -1.8124464750289917, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.8124464750289917, "logits_per_char": -0.9062232375144958, "bits_per_byte": 1.3074037707014436, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 783, "native_id": "Mercury_7116183", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2340433597564697, "logits_per_token_corr": -1.2340433597564697, "logits_per_char_corr": -0.6170216798782349, "bits_per_byte_corr": 0.8901741176819464}, "model_output": [{"sum_logits": -1.365764856338501, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.365764856338501, "logits_per_char": -0.6828824281692505, "bits_per_byte": 0.9851910926306993, "num_chars": 2}, {"sum_logits": -1.2340433597564697, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.2340433597564697, "logits_per_char": -0.6170216798782349, "bits_per_byte": 0.8901741176819464, "num_chars": 2}, {"sum_logits": -1.356294870376587, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.356294870376587, "logits_per_char": -0.6781474351882935, "bits_per_byte": 0.9783599417384236, "num_chars": 2}, {"sum_logits": -1.6779272556304932, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.6779272556304932, "logits_per_char": -0.8389636278152466, "bits_per_byte": 1.2103686653361398, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 784, "native_id": "Mercury_7106628", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6878582239151, "logits_per_token_corr": -1.6878582239151, "logits_per_char_corr": -0.84392911195755, "bits_per_byte_corr": 1.217532344683859}, "model_output": [{"sum_logits": -1.330896019935608, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.330896019935608, "logits_per_char": -0.665448009967804, "bits_per_byte": 0.96003854395067, "num_chars": 2}, {"sum_logits": -1.2698150873184204, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2698150873184204, "logits_per_char": -0.6349075436592102, "bits_per_byte": 0.9159779646607936, "num_chars": 2}, {"sum_logits": -1.3619667291641235, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3619667291641235, "logits_per_char": -0.6809833645820618, "bits_per_byte": 0.9824513230111273, "num_chars": 2}, {"sum_logits": -1.6878582239151, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.6878582239151, "logits_per_char": -0.84392911195755, "bits_per_byte": 1.217532344683859, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 785, "native_id": "Mercury_7203473", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.511453628540039, "logits_per_token_corr": -2.511453628540039, "logits_per_char_corr": -1.2557268142700195, "bits_per_byte_corr": 1.8116308476599552}, "model_output": [{"sum_logits": -1.0404491424560547, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.0404491424560547, "logits_per_char": -0.5202245712280273, "bits_per_byte": 0.7505254090598016, "num_chars": 2}, {"sum_logits": -1.112813949584961, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.112813949584961, "logits_per_char": -0.5564069747924805, "bits_per_byte": 0.8027255832497188, "num_chars": 2}, {"sum_logits": -1.487680435180664, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.487680435180664, "logits_per_char": -0.743840217590332, "bits_per_byte": 1.0731345931321106, "num_chars": 2}, {"sum_logits": -2.511453628540039, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -2.511453628540039, "logits_per_char": -1.2557268142700195, "bits_per_byte": 1.8116308476599552, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 786, "native_id": "Mercury_SC_416108", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3925118446350098, "logits_per_token_corr": -1.3925118446350098, "logits_per_char_corr": -0.6962559223175049, "bits_per_byte_corr": 1.0044849663177573}, "model_output": [{"sum_logits": -1.3761162757873535, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3761162757873535, "logits_per_char": -0.6880581378936768, "bits_per_byte": 0.9926580633832152, "num_chars": 2}, {"sum_logits": -1.2896409034729004, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2896409034729004, "logits_per_char": -0.6448204517364502, "bits_per_byte": 0.9302792679846262, "num_chars": 2}, {"sum_logits": -1.3925118446350098, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3925118446350098, "logits_per_char": -0.6962559223175049, "bits_per_byte": 1.0044849663177573, "num_chars": 2}, {"sum_logits": -1.5644211769104004, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5644211769104004, "logits_per_char": -0.7822105884552002, "bits_per_byte": 1.128491336895966, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 787, "native_id": "LEAP_2007_8_10418", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1556745767593384, "logits_per_token_corr": -1.1556745767593384, "logits_per_char_corr": -0.5778372883796692, "bits_per_byte_corr": 0.8336429903866736}, "model_output": [{"sum_logits": -1.5492712259292603, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.5492712259292603, "logits_per_char": -0.7746356129646301, "bits_per_byte": 1.1175629573208572, "num_chars": 2}, {"sum_logits": -1.1858774423599243, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.1858774423599243, "logits_per_char": -0.5929387211799622, "bits_per_byte": 0.8554297525979897, "num_chars": 2}, {"sum_logits": -1.1556745767593384, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -1.1556745767593384, "logits_per_char": -0.5778372883796692, "bits_per_byte": 0.8336429903866736, "num_chars": 2}, {"sum_logits": -1.8654009103775024, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.8654009103775024, "logits_per_char": -0.9327004551887512, "bits_per_byte": 1.345602321336657, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 788, "native_id": "Mercury_7111178", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2734848260879517, "logits_per_token_corr": -1.2734848260879517, "logits_per_char_corr": -0.6367424130439758, "bits_per_byte_corr": 0.9186251216228759}, "model_output": [{"sum_logits": -1.623120903968811, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.623120903968811, "logits_per_char": -0.8115604519844055, "bits_per_byte": 1.1708342394603488, "num_chars": 2}, {"sum_logits": -1.0958858728408813, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": true, "logits_per_token": -1.0958858728408813, "logits_per_char": -0.5479429364204407, "bits_per_byte": 0.7905145570644743, "num_chars": 2}, {"sum_logits": -1.2734848260879517, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.2734848260879517, "logits_per_char": -0.6367424130439758, "bits_per_byte": 0.9186251216228759, "num_chars": 2}, {"sum_logits": -1.7724529504776, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.7724529504776, "logits_per_char": -0.8862264752388, "bits_per_byte": 1.278554540932441, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 789, "native_id": "Mercury_7203560", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4802299737930298, "logits_per_token_corr": -1.4802299737930298, "logits_per_char_corr": -0.7401149868965149, "bits_per_byte_corr": 1.0677602212839692}, "model_output": [{"sum_logits": -1.4802299737930298, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.4802299737930298, "logits_per_char": -0.7401149868965149, "bits_per_byte": 1.0677602212839692, "num_chars": 2}, {"sum_logits": -1.0663734674453735, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": true, "logits_per_token": -1.0663734674453735, "logits_per_char": -0.5331867337226868, "bits_per_byte": 0.769225856610057, "num_chars": 2}, {"sum_logits": -1.4158302545547485, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.4158302545547485, "logits_per_char": -0.7079151272773743, "bits_per_byte": 1.021305643494081, "num_chars": 2}, {"sum_logits": -1.7612148523330688, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.7612148523330688, "logits_per_char": -0.8806074261665344, "bits_per_byte": 1.270447966701366, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 790, "native_id": "ACTAAP_2013_7_2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1991386413574219, "logits_per_token_corr": -1.1991386413574219, "logits_per_char_corr": -0.5995693206787109, "bits_per_byte_corr": 0.8649956856129624}, "model_output": [{"sum_logits": -1.1991386413574219, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.1991386413574219, "logits_per_char": -0.5995693206787109, "bits_per_byte": 0.8649956856129624, "num_chars": 2}, {"sum_logits": -1.143789291381836, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -1.143789291381836, "logits_per_char": -0.571894645690918, "bits_per_byte": 0.8250695692498309, "num_chars": 2}, {"sum_logits": -1.3921222686767578, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.3921222686767578, "logits_per_char": -0.6960611343383789, "bits_per_byte": 1.0042039466662473, "num_chars": 2}, {"sum_logits": -2.106304168701172, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -2.106304168701172, "logits_per_char": -1.053152084350586, "bits_per_byte": 1.5193772893955573, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 791, "native_id": "MCAS_2012_8_23640", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4192864894866943, "logits_per_token_corr": -1.4192864894866943, "logits_per_char_corr": -0.7096432447433472, "bits_per_byte_corr": 1.0237987899923155}, "model_output": [{"sum_logits": -1.4127957820892334, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4127957820892334, "logits_per_char": -0.7063978910446167, "bits_per_byte": 1.019116734305223, "num_chars": 2}, {"sum_logits": -1.2806928157806396, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.2806928157806396, "logits_per_char": -0.6403464078903198, "bits_per_byte": 0.9238245871150895, "num_chars": 2}, {"sum_logits": -1.519733190536499, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.519733190536499, "logits_per_char": -0.7598665952682495, "bits_per_byte": 1.0962557687314722, "num_chars": 2}, {"sum_logits": -1.4192864894866943, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4192864894866943, "logits_per_char": -0.7096432447433472, "bits_per_byte": 1.0237987899923155, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 792, "native_id": "Mercury_404272", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.135786533355713, "logits_per_token_corr": -1.135786533355713, "logits_per_char_corr": -0.5678932666778564, "bits_per_byte_corr": 0.8192967995909657}, "model_output": [{"sum_logits": -1.192610263824463, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.192610263824463, "logits_per_char": -0.5963051319122314, "bits_per_byte": 0.8602864566670335, "num_chars": 2}, {"sum_logits": -1.135786533355713, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.135786533355713, "logits_per_char": -0.5678932666778564, "bits_per_byte": 0.8192967995909657, "num_chars": 2}, {"sum_logits": -1.4778389930725098, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4778389930725098, "logits_per_char": -0.7389194965362549, "bits_per_byte": 1.0660354932697904, "num_chars": 2}, {"sum_logits": -1.9882988929748535, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.9882988929748535, "logits_per_char": -0.9941494464874268, "bits_per_byte": 1.4342544763509488, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 793, "native_id": "MCAS_2009_8_17", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3491599559783936, "logits_per_token_corr": -1.3491599559783936, "logits_per_char_corr": -0.6745799779891968, "bits_per_byte_corr": 0.9732131889286995}, "model_output": [{"sum_logits": -1.4352977275848389, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4352977275848389, "logits_per_char": -0.7176488637924194, "bits_per_byte": 1.0353484568936666, "num_chars": 2}, {"sum_logits": -1.3491599559783936, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3491599559783936, "logits_per_char": -0.6745799779891968, "bits_per_byte": 0.9732131889286995, "num_chars": 2}, {"sum_logits": -1.2919490337371826, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.2919490337371826, "logits_per_char": -0.6459745168685913, "bits_per_byte": 0.9319442320276302, "num_chars": 2}, {"sum_logits": -1.5479228496551514, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.5479228496551514, "logits_per_char": -0.7739614248275757, "bits_per_byte": 1.1165903094389018, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 794, "native_id": "AIMS_2008_4_5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2334610223770142, "logits_per_token_corr": -1.2334610223770142, "logits_per_char_corr": -0.6167305111885071, "bits_per_byte_corr": 0.8897540500572138}, "model_output": [{"sum_logits": -1.3993927240371704, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3993927240371704, "logits_per_char": -0.6996963620185852, "bits_per_byte": 1.009448471612987, "num_chars": 2}, {"sum_logits": -1.2084709405899048, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.2084709405899048, "logits_per_char": -0.6042354702949524, "bits_per_byte": 0.8717275165243646, "num_chars": 2}, {"sum_logits": -1.2334610223770142, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.2334610223770142, "logits_per_char": -0.6167305111885071, "bits_per_byte": 0.8897540500572138, "num_chars": 2}, {"sum_logits": -1.93730890750885, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.93730890750885, "logits_per_char": -0.968654453754425, "bits_per_byte": 1.3974729767675207, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 795, "native_id": "Mercury_7236513", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8124057054519653, "logits_per_token_corr": -1.8124057054519653, "logits_per_char_corr": -0.9062028527259827, "bits_per_byte_corr": 1.3073743616681461}, "model_output": [{"sum_logits": -1.4202414751052856, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4202414751052856, "logits_per_char": -0.7101207375526428, "bits_per_byte": 1.0244876665003468, "num_chars": 2}, {"sum_logits": -1.1197692155838013, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.1197692155838013, "logits_per_char": -0.5598846077919006, "bits_per_byte": 0.8077427471320177, "num_chars": 2}, {"sum_logits": -1.4006434679031372, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4006434679031372, "logits_per_char": -0.7003217339515686, "bits_per_byte": 1.010350692599414, "num_chars": 2}, {"sum_logits": -1.8124057054519653, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.8124057054519653, "logits_per_char": -0.9062028527259827, "bits_per_byte": 1.3073743616681461, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 796, "native_id": "Mercury_SC_LBS10027", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1483277082443237, "logits_per_token_corr": -1.1483277082443237, "logits_per_char_corr": -0.5741638541221619, "bits_per_byte_corr": 0.8283433450003322}, "model_output": [{"sum_logits": -1.0382393598556519, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.0382393598556519, "logits_per_char": -0.5191196799278259, "bits_per_byte": 0.7489313878602785, "num_chars": 2}, {"sum_logits": -1.1483277082443237, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.1483277082443237, "logits_per_char": -0.5741638541221619, "bits_per_byte": 0.8283433450003322, "num_chars": 2}, {"sum_logits": -1.5374287366867065, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.5374287366867065, "logits_per_char": -0.7687143683433533, "bits_per_byte": 1.1090204070698444, "num_chars": 2}, {"sum_logits": -2.2640371322631836, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -2.2640371322631836, "logits_per_char": -1.1320185661315918, "bits_per_byte": 1.6331575715534559, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 797, "native_id": "Mercury_189053", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.31846022605896, "logits_per_token_corr": -2.31846022605896, "logits_per_char_corr": -1.15923011302948, "bits_per_byte_corr": 1.6724155353179848}, "model_output": [{"sum_logits": -0.9812772274017334, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -0.9812772274017334, "logits_per_char": -0.4906386137008667, "bits_per_byte": 0.7078418948553847, "num_chars": 2}, {"sum_logits": -1.1783864498138428, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.1783864498138428, "logits_per_char": -0.5891932249069214, "bits_per_byte": 0.8500261436992018, "num_chars": 2}, {"sum_logits": -1.5657384395599365, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.5657384395599365, "logits_per_char": -0.7828692197799683, "bits_per_byte": 1.1294415410419836, "num_chars": 2}, {"sum_logits": -2.31846022605896, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -2.31846022605896, "logits_per_char": -1.15923011302948, "bits_per_byte": 1.6724155353179848, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 798, "native_id": "Mercury_SC_414271", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.326290249824524, "logits_per_token_corr": -1.326290249824524, "logits_per_char_corr": -0.663145124912262, "bits_per_byte_corr": 0.9567161831012999}, "model_output": [{"sum_logits": -1.3616029024124146, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3616029024124146, "logits_per_char": -0.6808014512062073, "bits_per_byte": 0.9821888774859104, "num_chars": 2}, {"sum_logits": -1.094488263130188, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.094488263130188, "logits_per_char": -0.547244131565094, "bits_per_byte": 0.7895063947651157, "num_chars": 2}, {"sum_logits": -1.326290249824524, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.326290249824524, "logits_per_char": -0.663145124912262, "bits_per_byte": 0.9567161831012999, "num_chars": 2}, {"sum_logits": -2.015378952026367, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -2.015378952026367, "logits_per_char": -1.0076894760131836, "bits_per_byte": 1.4537886098012625, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 799, "native_id": "Mercury_408922", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.528809905052185, "logits_per_token_corr": -1.528809905052185, "logits_per_char_corr": -0.7644049525260925, "bits_per_byte_corr": 1.1028032342411496}, "model_output": [{"sum_logits": -1.2319139242172241, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.2319139242172241, "logits_per_char": -0.6159569621086121, "bits_per_byte": 0.8886380546357642, "num_chars": 2}, {"sum_logits": -1.0602067708969116, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": true, "logits_per_token": -1.0602067708969116, "logits_per_char": -0.5301033854484558, "bits_per_byte": 0.7647775253454874, "num_chars": 2}, {"sum_logits": -1.528809905052185, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.528809905052185, "logits_per_char": -0.7644049525260925, "bits_per_byte": 1.1028032342411496, "num_chars": 2}, {"sum_logits": -1.9958699941635132, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.9958699941635132, "logits_per_char": -0.9979349970817566, "bits_per_byte": 1.4397158714204268, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 800, "native_id": "Mercury_7264093", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3548320531845093, "logits_per_token_corr": -1.3548320531845093, "logits_per_char_corr": -0.6774160265922546, "bits_per_byte_corr": 0.977304742184054}, "model_output": [{"sum_logits": -1.525081992149353, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.525081992149353, "logits_per_char": -0.7625409960746765, "bits_per_byte": 1.1001141135122567, "num_chars": 2}, {"sum_logits": -1.3103488683700562, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.3103488683700562, "logits_per_char": -0.6551744341850281, "bits_per_byte": 0.9452169071166516, "num_chars": 2}, {"sum_logits": -1.3548320531845093, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3548320531845093, "logits_per_char": -0.6774160265922546, "bits_per_byte": 0.977304742184054, "num_chars": 2}, {"sum_logits": -1.6175159215927124, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.6175159215927124, "logits_per_char": -0.8087579607963562, "bits_per_byte": 1.166791099321212, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 801, "native_id": "Mercury_SC_LBS11009", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.520673155784607, "logits_per_token_corr": -1.520673155784607, "logits_per_char_corr": -0.7603365778923035, "bits_per_byte_corr": 1.0969338103324993}, "model_output": [{"sum_logits": -1.1648648977279663, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.1648648977279663, "logits_per_char": -0.5824324488639832, "bits_per_byte": 0.8402724056294869, "num_chars": 2}, {"sum_logits": -1.2473291158676147, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.2473291158676147, "logits_per_char": -0.6236645579338074, "bits_per_byte": 0.899757764909958, "num_chars": 2}, {"sum_logits": -1.520673155784607, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.520673155784607, "logits_per_char": -0.7603365778923035, "bits_per_byte": 1.0969338103324993, "num_chars": 2}, {"sum_logits": -1.8622716665267944, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.8622716665267944, "logits_per_char": -0.9311358332633972, "bits_per_byte": 1.343345049044081, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 802, "native_id": "Mercury_7191433", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1077618598937988, "logits_per_token_corr": -1.1077618598937988, "logits_per_char_corr": -0.5538809299468994, "bits_per_byte_corr": 0.7990812708779332}, "model_output": [{"sum_logits": -1.5520997047424316, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.5520997047424316, "logits_per_char": -0.7760498523712158, "bits_per_byte": 1.1196032734993695, "num_chars": 2}, {"sum_logits": -1.286512851715088, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.286512851715088, "logits_per_char": -0.643256425857544, "bits_per_byte": 0.9280228556053045, "num_chars": 2}, {"sum_logits": -1.1077618598937988, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.1077618598937988, "logits_per_char": -0.5538809299468994, "bits_per_byte": 0.7990812708779332, "num_chars": 2}, {"sum_logits": -1.7705445289611816, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.7705445289611816, "logits_per_char": -0.8852722644805908, "bits_per_byte": 1.2771779058036088, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 803, "native_id": "MEAP_2005_5_14", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5577715635299683, "logits_per_token_corr": -1.5577715635299683, "logits_per_char_corr": -0.7788857817649841, "bits_per_byte_corr": 1.1236946547720734}, "model_output": [{"sum_logits": -1.6448153257369995, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.6448153257369995, "logits_per_char": -0.8224076628684998, "bits_per_byte": 1.1864834568103195, "num_chars": 2}, {"sum_logits": -1.216455340385437, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.216455340385437, "logits_per_char": -0.6082276701927185, "bits_per_byte": 0.8774870435191134, "num_chars": 2}, {"sum_logits": -1.2622507810592651, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.2622507810592651, "logits_per_char": -0.6311253905296326, "bits_per_byte": 0.9105214710968654, "num_chars": 2}, {"sum_logits": -1.5577715635299683, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.5577715635299683, "logits_per_char": -0.7788857817649841, "bits_per_byte": 1.1236946547720734, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 804, "native_id": "Mercury_416683", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4192399978637695, "logits_per_token_corr": -1.4192399978637695, "logits_per_char_corr": -0.7096199989318848, "bits_per_byte_corr": 1.0237652533753971}, "model_output": [{"sum_logits": -1.333439826965332, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.333439826965332, "logits_per_char": -0.666719913482666, "bits_per_byte": 0.9618735128440521, "num_chars": 2}, {"sum_logits": -1.0342035293579102, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.0342035293579102, "logits_per_char": -0.5171017646789551, "bits_per_byte": 0.7460201515377962, "num_chars": 2}, {"sum_logits": -1.4192399978637695, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4192399978637695, "logits_per_char": -0.7096199989318848, "bits_per_byte": 1.0237652533753971, "num_chars": 2}, {"sum_logits": -2.0442380905151367, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -2.0442380905151367, "logits_per_char": -1.0221190452575684, "bits_per_byte": 1.4746060777923153, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 805, "native_id": "Mercury_7040775", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.265654444694519, "logits_per_token_corr": -1.265654444694519, "logits_per_char_corr": -0.6328272223472595, "bits_per_byte_corr": 0.9129766954205847}, "model_output": [{"sum_logits": -1.452238917350769, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.452238917350769, "logits_per_char": -0.7261194586753845, "bits_per_byte": 1.0475689421247085, "num_chars": 2}, {"sum_logits": -1.265654444694519, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.265654444694519, "logits_per_char": -0.6328272223472595, "bits_per_byte": 0.9129766954205847, "num_chars": 2}, {"sum_logits": -1.2412327527999878, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.2412327527999878, "logits_per_char": -0.6206163763999939, "bits_per_byte": 0.8953601685273928, "num_chars": 2}, {"sum_logits": -1.6960495710372925, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.6960495710372925, "logits_per_char": -0.8480247855186462, "bits_per_byte": 1.2234411526195568, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 806, "native_id": "Mercury_7222600", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.342023253440857, "logits_per_token_corr": -1.342023253440857, "logits_per_char_corr": -0.6710116267204285, "bits_per_byte_corr": 0.9680651462490939}, "model_output": [{"sum_logits": -1.3938592672348022, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3938592672348022, "logits_per_char": -0.6969296336174011, "bits_per_byte": 1.005456926269109, "num_chars": 2}, {"sum_logits": -1.165702223777771, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.165702223777771, "logits_per_char": -0.5828511118888855, "bits_per_byte": 0.8408764086993176, "num_chars": 2}, {"sum_logits": -1.342023253440857, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.342023253440857, "logits_per_char": -0.6710116267204285, "bits_per_byte": 0.9680651462490939, "num_chars": 2}, {"sum_logits": -1.8147481679916382, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.8147481679916382, "logits_per_char": -0.9073740839958191, "bits_per_byte": 1.3090640912128744, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 807, "native_id": "MCAS_2001_5_3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.524839997291565, "logits_per_token_corr": -1.524839997291565, "logits_per_char_corr": -0.7624199986457825, "bits_per_byte_corr": 1.0999395511216308}, "model_output": [{"sum_logits": -1.4738432168960571, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4738432168960571, "logits_per_char": -0.7369216084480286, "bits_per_byte": 1.063153150032653, "num_chars": 2}, {"sum_logits": -1.3681150674819946, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3681150674819946, "logits_per_char": -0.6840575337409973, "bits_per_byte": 0.9868864116115806, "num_chars": 2}, {"sum_logits": -1.2822881937026978, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.2822881937026978, "logits_per_char": -0.6411440968513489, "bits_per_byte": 0.9249754090233389, "num_chars": 2}, {"sum_logits": -1.524839997291565, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.524839997291565, "logits_per_char": -0.7624199986457825, "bits_per_byte": 1.0999395511216308, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 808, "native_id": "MCAS_2004_8_7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6414227485656738, "logits_per_token_corr": -1.6414227485656738, "logits_per_char_corr": -0.8207113742828369, "bits_per_byte_corr": 1.1840362296798654}, "model_output": [{"sum_logits": -1.5640950202941895, "num_tokens": 1, "num_tokens_all": 435, "is_greedy": false, "logits_per_token": -1.5640950202941895, "logits_per_char": -0.7820475101470947, "bits_per_byte": 1.1282560646295854, "num_chars": 2}, {"sum_logits": -1.2171711921691895, "num_tokens": 1, "num_tokens_all": 435, "is_greedy": true, "logits_per_token": -1.2171711921691895, "logits_per_char": -0.6085855960845947, "bits_per_byte": 0.8780034214283293, "num_chars": 2}, {"sum_logits": -1.2581658363342285, "num_tokens": 1, "num_tokens_all": 435, "is_greedy": false, "logits_per_token": -1.2581658363342285, "logits_per_char": -0.6290829181671143, "bits_per_byte": 0.9075748063483053, "num_chars": 2}, {"sum_logits": -1.6414227485656738, "num_tokens": 1, "num_tokens_all": 435, "is_greedy": false, "logits_per_token": -1.6414227485656738, "logits_per_char": -0.8207113742828369, "bits_per_byte": 1.1840362296798654, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 809, "native_id": "Mercury_415268", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.865637183189392, "logits_per_token_corr": -1.865637183189392, "logits_per_char_corr": -0.932818591594696, "bits_per_byte_corr": 1.345772756143662}, "model_output": [{"sum_logits": -1.3217757940292358, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.3217757940292358, "logits_per_char": -0.6608878970146179, "bits_per_byte": 0.9534596916072102, "num_chars": 2}, {"sum_logits": -1.0991405248641968, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.0991405248641968, "logits_per_char": -0.5495702624320984, "bits_per_byte": 0.7928622922314041, "num_chars": 2}, {"sum_logits": -1.5429633855819702, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.5429633855819702, "logits_per_char": -0.7714816927909851, "bits_per_byte": 1.1130128123269767, "num_chars": 2}, {"sum_logits": -1.865637183189392, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.865637183189392, "logits_per_char": -0.932818591594696, "bits_per_byte": 1.345772756143662, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 810, "native_id": "Mercury_7017710", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2559213638305664, "logits_per_token_corr": -1.2559213638305664, "logits_per_char_corr": -0.6279606819152832, "bits_per_byte_corr": 0.9059557616730817}, "model_output": [{"sum_logits": -1.2513971328735352, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2513971328735352, "logits_per_char": -0.6256985664367676, "bits_per_byte": 0.9026922188903067, "num_chars": 2}, {"sum_logits": -1.3277463912963867, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3277463912963867, "logits_per_char": -0.6638731956481934, "bits_per_byte": 0.9577665671414453, "num_chars": 2}, {"sum_logits": -1.2559213638305664, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.2559213638305664, "logits_per_char": -0.6279606819152832, "bits_per_byte": 0.9059557616730817, "num_chars": 2}, {"sum_logits": -1.8745088577270508, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.8745088577270508, "logits_per_char": -0.9372544288635254, "bits_per_byte": 1.3521723165735973, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 811, "native_id": "Mercury_7210123", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2088099718093872, "logits_per_token_corr": -1.2088099718093872, "logits_per_char_corr": -0.6044049859046936, "bits_per_byte_corr": 0.8719720758538917}, "model_output": [{"sum_logits": -1.2088099718093872, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2088099718093872, "logits_per_char": -0.6044049859046936, "bits_per_byte": 0.8719720758538917, "num_chars": 2}, {"sum_logits": -1.25037682056427, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.25037682056427, "logits_per_char": -0.625188410282135, "bits_per_byte": 0.9019562191359388, "num_chars": 2}, {"sum_logits": -1.3130122423171997, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3130122423171997, "logits_per_char": -0.6565061211585999, "bits_per_byte": 0.9471381253094414, "num_chars": 2}, {"sum_logits": -2.04105281829834, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -2.04105281829834, "logits_per_char": -1.02052640914917, "bits_per_byte": 1.4723083895767866, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 812, "native_id": "MCAS_2009_5_6519", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1999012231826782, "logits_per_token_corr": -1.1999012231826782, "logits_per_char_corr": -0.5999506115913391, "bits_per_byte_corr": 0.8655457721217474}, "model_output": [{"sum_logits": -1.430799126625061, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.430799126625061, "logits_per_char": -0.7153995633125305, "bits_per_byte": 1.0321034022458593, "num_chars": 2}, {"sum_logits": -1.2109466791152954, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.2109466791152954, "logits_per_char": -0.6054733395576477, "bits_per_byte": 0.8735133843709253, "num_chars": 2}, {"sum_logits": -1.1999012231826782, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.1999012231826782, "logits_per_char": -0.5999506115913391, "bits_per_byte": 0.8655457721217474, "num_chars": 2}, {"sum_logits": -1.9360405206680298, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.9360405206680298, "logits_per_char": -0.9680202603340149, "bits_per_byte": 1.39655802906493, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 813, "native_id": "Mercury_401502", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9686511754989624, "logits_per_token_corr": -1.9686511754989624, "logits_per_char_corr": -0.9843255877494812, "bits_per_byte_corr": 1.420081644067311}, "model_output": [{"sum_logits": -1.130459189414978, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.130459189414978, "logits_per_char": -0.565229594707489, "bits_per_byte": 0.8154539332487589, "num_chars": 2}, {"sum_logits": -1.6766895055770874, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.6766895055770874, "logits_per_char": -0.8383447527885437, "bits_per_byte": 1.2094758174041849, "num_chars": 2}, {"sum_logits": -1.7634166479110718, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.7634166479110718, "logits_per_char": -0.8817083239555359, "bits_per_byte": 1.272036226482085, "num_chars": 2}, {"sum_logits": -1.9686511754989624, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.9686511754989624, "logits_per_char": -0.9843255877494812, "bits_per_byte": 1.420081644067311, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 814, "native_id": "Mercury_7109498", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0380175113677979, "logits_per_token_corr": -1.0380175113677979, "logits_per_char_corr": -0.5190087556838989, "bits_per_byte_corr": 0.7487713580036506}, "model_output": [{"sum_logits": -1.3048079013824463, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.3048079013824463, "logits_per_char": -0.6524039506912231, "bits_per_byte": 0.9412199443192717, "num_chars": 2}, {"sum_logits": -1.0380175113677979, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -1.0380175113677979, "logits_per_char": -0.5190087556838989, "bits_per_byte": 0.7487713580036506, "num_chars": 2}, {"sum_logits": -1.4584906101226807, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.4584906101226807, "logits_per_char": -0.7292453050613403, "bits_per_byte": 1.0520785852043109, "num_chars": 2}, {"sum_logits": -2.0215952396392822, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -2.0215952396392822, "logits_per_char": -1.0107976198196411, "bits_per_byte": 1.4582727134572118, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 815, "native_id": "VASoL_2008_5_10", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4341516494750977, "logits_per_token_corr": -1.4341516494750977, "logits_per_char_corr": -0.7170758247375488, "bits_per_byte_corr": 1.0345217362909684}, "model_output": [{"sum_logits": -1.4341516494750977, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4341516494750977, "logits_per_char": -0.7170758247375488, "bits_per_byte": 1.0345217362909684, "num_chars": 2}, {"sum_logits": -1.1674203872680664, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.1674203872680664, "logits_per_char": -0.5837101936340332, "bits_per_byte": 0.8421158016727612, "num_chars": 2}, {"sum_logits": -1.3141489028930664, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3141489028930664, "logits_per_char": -0.6570744514465332, "bits_per_byte": 0.9479580525974305, "num_chars": 2}, {"sum_logits": -1.7915658950805664, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.7915658950805664, "logits_per_char": -0.8957829475402832, "bits_per_byte": 1.2923416161301935, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 816, "native_id": "MCAS_2006_9_4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.588092565536499, "logits_per_token_corr": -1.588092565536499, "logits_per_char_corr": -0.7940462827682495, "bits_per_byte_corr": 1.1455666343868922}, "model_output": [{"sum_logits": -1.588092565536499, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.588092565536499, "logits_per_char": -0.7940462827682495, "bits_per_byte": 1.1455666343868922, "num_chars": 2}, {"sum_logits": -1.0071141719818115, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": true, "logits_per_token": -1.0071141719818115, "logits_per_char": -0.5035570859909058, "bits_per_byte": 0.726479310764099, "num_chars": 2}, {"sum_logits": -1.3112542629241943, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.3112542629241943, "logits_per_char": -0.6556271314620972, "bits_per_byte": 0.9458700112333036, "num_chars": 2}, {"sum_logits": -1.9100701808929443, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.9100701808929443, "logits_per_char": -0.9550350904464722, "bits_per_byte": 1.377824388863058, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 817, "native_id": "Mercury_402341", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4883813858032227, "logits_per_token_corr": -1.4883813858032227, "logits_per_char_corr": -0.7441906929016113, "bits_per_byte_corr": 1.0736402221256476}, "model_output": [{"sum_logits": -1.522578239440918, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.522578239440918, "logits_per_char": -0.761289119720459, "bits_per_byte": 1.0983080377042196, "num_chars": 2}, {"sum_logits": -1.2729616165161133, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2729616165161133, "logits_per_char": -0.6364808082580566, "bits_per_byte": 0.9182477056955572, "num_chars": 2}, {"sum_logits": -1.3901777267456055, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3901777267456055, "logits_per_char": -0.6950888633728027, "bits_per_byte": 1.0028012561658093, "num_chars": 2}, {"sum_logits": -1.4883813858032227, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4883813858032227, "logits_per_char": -0.7441906929016113, "bits_per_byte": 1.0736402221256476, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 818, "native_id": "MCAS_2006_9_34", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4050240516662598, "logits_per_token_corr": -1.4050240516662598, "logits_per_char_corr": -0.7025120258331299, "bits_per_byte_corr": 1.013510615835044}, "model_output": [{"sum_logits": -1.5295815467834473, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.5295815467834473, "logits_per_char": -0.7647907733917236, "bits_per_byte": 1.1033598560906674, "num_chars": 2}, {"sum_logits": -1.4604496955871582, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4604496955871582, "logits_per_char": -0.7302248477935791, "bits_per_byte": 1.0534917666464516, "num_chars": 2}, {"sum_logits": -1.283277988433838, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.283277988433838, "logits_per_char": -0.641638994216919, "bits_per_byte": 0.9256893949983963, "num_chars": 2}, {"sum_logits": -1.4050240516662598, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4050240516662598, "logits_per_char": -0.7025120258331299, "bits_per_byte": 1.013510615835044, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 819, "native_id": "Mercury_7267715", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2511799335479736, "logits_per_token_corr": -1.2511799335479736, "logits_per_char_corr": -0.6255899667739868, "bits_per_byte_corr": 0.9025355426953706}, "model_output": [{"sum_logits": -1.4734547138214111, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.4734547138214111, "logits_per_char": -0.7367273569107056, "bits_per_byte": 1.062872904303072, "num_chars": 2}, {"sum_logits": -1.1544411182403564, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": true, "logits_per_token": -1.1544411182403564, "logits_per_char": -0.5772205591201782, "bits_per_byte": 0.8327532381424341, "num_chars": 2}, {"sum_logits": -1.2511799335479736, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.2511799335479736, "logits_per_char": -0.6255899667739868, "bits_per_byte": 0.9025355426953706, "num_chars": 2}, {"sum_logits": -1.8518555164337158, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.8518555164337158, "logits_per_char": -0.9259277582168579, "bits_per_byte": 1.3358313850018557, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 820, "native_id": "Mercury_SC_413089", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.116133213043213, "logits_per_token_corr": -2.116133213043213, "logits_per_char_corr": -1.0580666065216064, "bits_per_byte_corr": 1.5264674461600325}, "model_output": [{"sum_logits": -1.2401814460754395, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.2401814460754395, "logits_per_char": -0.6200907230377197, "bits_per_byte": 0.8946018110284126, "num_chars": 2}, {"sum_logits": -1.037142276763916, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.037142276763916, "logits_per_char": -0.518571138381958, "bits_per_byte": 0.7481400096923327, "num_chars": 2}, {"sum_logits": -1.4946904182434082, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4946904182434082, "logits_per_char": -0.7473452091217041, "bits_per_byte": 1.0781912270327825, "num_chars": 2}, {"sum_logits": -2.116133213043213, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -2.116133213043213, "logits_per_char": -1.0580666065216064, "bits_per_byte": 1.5264674461600325, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 821, "native_id": "Mercury_SC_401656", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2609320878982544, "logits_per_token_corr": -1.2609320878982544, "logits_per_char_corr": -0.6304660439491272, "bits_per_byte_corr": 0.9095702350549425}, "model_output": [{"sum_logits": -1.2609320878982544, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.2609320878982544, "logits_per_char": -0.6304660439491272, "bits_per_byte": 0.9095702350549425, "num_chars": 2}, {"sum_logits": -1.1379958391189575, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.1379958391189575, "logits_per_char": -0.5689979195594788, "bits_per_byte": 0.8208904768251871, "num_chars": 2}, {"sum_logits": -1.3886996507644653, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3886996507644653, "logits_per_char": -0.6943498253822327, "bits_per_byte": 1.0017350497217845, "num_chars": 2}, {"sum_logits": -2.0422163009643555, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -2.0422163009643555, "logits_per_char": -1.0211081504821777, "bits_per_byte": 1.4731476649129975, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 822, "native_id": "Mercury_407019", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8482201099395752, "logits_per_token_corr": -1.8482201099395752, "logits_per_char_corr": -0.9241100549697876, "bits_per_byte_corr": 1.3332089935414977}, "model_output": [{"sum_logits": -1.5341675281524658, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.5341675281524658, "logits_per_char": -0.7670837640762329, "bits_per_byte": 1.1066679423800159, "num_chars": 2}, {"sum_logits": -1.0560181140899658, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.0560181140899658, "logits_per_char": -0.5280090570449829, "bits_per_byte": 0.7617560481438019, "num_chars": 2}, {"sum_logits": -1.3593075275421143, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3593075275421143, "logits_per_char": -0.6796537637710571, "bits_per_byte": 0.9805331145147276, "num_chars": 2}, {"sum_logits": -1.8482201099395752, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.8482201099395752, "logits_per_char": -0.9241100549697876, "bits_per_byte": 1.3332089935414977, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 823, "native_id": "Mercury_417128", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.048633098602295, "logits_per_token_corr": -1.048633098602295, "logits_per_char_corr": -0.5243165493011475, "bits_per_byte_corr": 0.7564288855333225}, "model_output": [{"sum_logits": -1.7488760948181152, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.7488760948181152, "logits_per_char": -0.8744380474090576, "bits_per_byte": 1.2615474345625821, "num_chars": 2}, {"sum_logits": -1.048633098602295, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.048633098602295, "logits_per_char": -0.5243165493011475, "bits_per_byte": 0.7564288855333225, "num_chars": 2}, {"sum_logits": -1.3341708183288574, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.3341708183288574, "logits_per_char": -0.6670854091644287, "bits_per_byte": 0.9624008116515977, "num_chars": 2}, {"sum_logits": -1.603959560394287, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.603959560394287, "logits_per_char": -0.8019797801971436, "bits_per_byte": 1.1570122517844712, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 824, "native_id": "Mercury_7081305", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.742504596710205, "logits_per_token_corr": -1.742504596710205, "logits_per_char_corr": -0.8712522983551025, "bits_per_byte_corr": 1.256951370200921}, "model_output": [{"sum_logits": -1.2107586860656738, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.2107586860656738, "logits_per_char": -0.6053793430328369, "bits_per_byte": 0.8733777760507199, "num_chars": 2}, {"sum_logits": -1.2457451820373535, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.2457451820373535, "logits_per_char": -0.6228725910186768, "bits_per_byte": 0.8986151981689501, "num_chars": 2}, {"sum_logits": -1.4793648719787598, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4793648719787598, "logits_per_char": -0.7396824359893799, "bits_per_byte": 1.0671361822353131, "num_chars": 2}, {"sum_logits": -1.742504596710205, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.742504596710205, "logits_per_char": -0.8712522983551025, "bits_per_byte": 1.256951370200921, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 825, "native_id": "NYSEDREGENTS_2015_8_3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4865366220474243, "logits_per_token_corr": -1.4865366220474243, "logits_per_char_corr": -0.7432683110237122, "bits_per_byte_corr": 1.0723095063645955}, "model_output": [{"sum_logits": -1.5064226388931274, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.5064226388931274, "logits_per_char": -0.7532113194465637, "bits_per_byte": 1.086654235307771, "num_chars": 2}, {"sum_logits": -1.4865366220474243, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4865366220474243, "logits_per_char": -0.7432683110237122, "bits_per_byte": 1.0723095063645955, "num_chars": 2}, {"sum_logits": -1.1172701120376587, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.1172701120376587, "logits_per_char": -0.5586350560188293, "bits_per_byte": 0.8059400249856724, "num_chars": 2}, {"sum_logits": -1.5654150247573853, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.5654150247573853, "logits_per_char": -0.7827075123786926, "bits_per_byte": 1.129208246576088, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 826, "native_id": "MEA_2016_8_15", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7058595418930054, "logits_per_token_corr": -1.7058595418930054, "logits_per_char_corr": -0.8529297709465027, "bits_per_byte_corr": 1.230517550771963}, "model_output": [{"sum_logits": -1.7058595418930054, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.7058595418930054, "logits_per_char": -0.8529297709465027, "bits_per_byte": 1.230517550771963, "num_chars": 2}, {"sum_logits": -1.1938966512680054, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -1.1938966512680054, "logits_per_char": -0.5969483256340027, "bits_per_byte": 0.8612143890597644, "num_chars": 2}, {"sum_logits": -1.276227355003357, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.276227355003357, "logits_per_char": -0.6381136775016785, "bits_per_byte": 0.9206034380557522, "num_chars": 2}, {"sum_logits": -1.5417894124984741, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.5417894124984741, "logits_per_char": -0.7708947062492371, "bits_per_byte": 1.1121659697541275, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 827, "native_id": "ACTAAP_2015_7_9", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8788490295410156, "logits_per_token_corr": -1.8788490295410156, "logits_per_char_corr": -0.9394245147705078, "bits_per_byte_corr": 1.355303088749906}, "model_output": [{"sum_logits": -1.4340705871582031, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4340705871582031, "logits_per_char": -0.7170352935791016, "bits_per_byte": 1.034463262189675, "num_chars": 2}, {"sum_logits": -0.9706555008888245, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -0.9706555008888245, "logits_per_char": -0.48532775044441223, "bits_per_byte": 0.700179938772453, "num_chars": 2}, {"sum_logits": -1.6123924255371094, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.6123924255371094, "logits_per_char": -0.8061962127685547, "bits_per_byte": 1.163095278145493, "num_chars": 2}, {"sum_logits": -1.8788490295410156, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.8788490295410156, "logits_per_char": -0.9394245147705078, "bits_per_byte": 1.355303088749906, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 828, "native_id": "Mercury_7216423", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2674241065979004, "logits_per_token_corr": -1.2674241065979004, "logits_per_char_corr": -0.6337120532989502, "bits_per_byte_corr": 0.9142532366466147}, "model_output": [{"sum_logits": -1.4941811561584473, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4941811561584473, "logits_per_char": -0.7470905780792236, "bits_per_byte": 1.077823872090539, "num_chars": 2}, {"sum_logits": -1.2674241065979004, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.2674241065979004, "logits_per_char": -0.6337120532989502, "bits_per_byte": 0.9142532366466147, "num_chars": 2}, {"sum_logits": -1.301546573638916, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.301546573638916, "logits_per_char": -0.650773286819458, "bits_per_byte": 0.9388673936381176, "num_chars": 2}, {"sum_logits": -1.601879596710205, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.601879596710205, "logits_per_char": -0.8009397983551025, "bits_per_byte": 1.1555118751383429, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 829, "native_id": "Mercury_416633", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4446847438812256, "logits_per_token_corr": -1.4446847438812256, "logits_per_char_corr": -0.7223423719406128, "bits_per_byte_corr": 1.0421197578234418}, "model_output": [{"sum_logits": -1.4446847438812256, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4446847438812256, "logits_per_char": -0.7223423719406128, "bits_per_byte": 1.0421197578234418, "num_chars": 2}, {"sum_logits": -1.2632291316986084, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2632291316986084, "logits_per_char": -0.6316145658493042, "bits_per_byte": 0.9112272019046814, "num_chars": 2}, {"sum_logits": -1.3741319179534912, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3741319179534912, "logits_per_char": -0.6870659589767456, "bits_per_byte": 0.991226651780083, "num_chars": 2}, {"sum_logits": -1.5746896266937256, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5746896266937256, "logits_per_char": -0.7873448133468628, "bits_per_byte": 1.1358984576859816, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 830, "native_id": "Mercury_7038518", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4807977676391602, "logits_per_token_corr": -1.4807977676391602, "logits_per_char_corr": -0.7403988838195801, "bits_per_byte_corr": 1.0681697979669993}, "model_output": [{"sum_logits": -1.3714380264282227, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3714380264282227, "logits_per_char": -0.6857190132141113, "bits_per_byte": 0.9892834198079827, "num_chars": 2}, {"sum_logits": -1.4800844192504883, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4800844192504883, "logits_per_char": -0.7400422096252441, "bits_per_byte": 1.0676552258756176, "num_chars": 2}, {"sum_logits": -1.3020734786987305, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.3020734786987305, "logits_per_char": -0.6510367393493652, "bits_per_byte": 0.9392474752965246, "num_chars": 2}, {"sum_logits": -1.4807977676391602, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4807977676391602, "logits_per_char": -0.7403988838195801, "bits_per_byte": 1.0681697979669993, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 831, "native_id": "Mercury_7085225", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.219969630241394, "logits_per_token_corr": -1.219969630241394, "logits_per_char_corr": -0.609984815120697, "bits_per_byte_corr": 0.880022067792833}, "model_output": [{"sum_logits": -1.298319697380066, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.298319697380066, "logits_per_char": -0.649159848690033, "bits_per_byte": 0.9365396944500133, "num_chars": 2}, {"sum_logits": -1.219969630241394, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.219969630241394, "logits_per_char": -0.609984815120697, "bits_per_byte": 0.880022067792833, "num_chars": 2}, {"sum_logits": -1.4346684217453003, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4346684217453003, "logits_per_char": -0.7173342108726501, "bits_per_byte": 1.0348945086867138, "num_chars": 2}, {"sum_logits": -1.7356022596359253, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.7356022596359253, "logits_per_char": -0.8678011298179626, "bits_per_byte": 1.2519723864671137, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 832, "native_id": "LEAP__4_10225", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0704381465911865, "logits_per_token_corr": -1.0704381465911865, "logits_per_char_corr": -0.5352190732955933, "bits_per_byte_corr": 0.7721579028332938}, "model_output": [{"sum_logits": -1.3101270198822021, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3101270198822021, "logits_per_char": -0.6550635099411011, "bits_per_byte": 0.9450568772600236, "num_chars": 2}, {"sum_logits": -1.0704381465911865, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.0704381465911865, "logits_per_char": -0.5352190732955933, "bits_per_byte": 0.7721579028332938, "num_chars": 2}, {"sum_logits": -1.4860360622406006, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4860360622406006, "logits_per_char": -0.7430180311203003, "bits_per_byte": 1.0719484287891088, "num_chars": 2}, {"sum_logits": -1.9669549465179443, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.9669549465179443, "logits_per_char": -0.9834774732589722, "bits_per_byte": 1.4188580734977467, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 833, "native_id": "Mercury_SC_401661", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6281566619873047, "logits_per_token_corr": -1.6281566619873047, "logits_per_char_corr": -0.8140783309936523, "bits_per_byte_corr": 1.17446677102055}, "model_output": [{"sum_logits": -1.206827163696289, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.206827163696289, "logits_per_char": -0.6034135818481445, "bits_per_byte": 0.8705417821379902, "num_chars": 2}, {"sum_logits": -1.2796707153320312, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.2796707153320312, "logits_per_char": -0.6398353576660156, "bits_per_byte": 0.9230872974908401, "num_chars": 2}, {"sum_logits": -1.5419635772705078, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5419635772705078, "logits_per_char": -0.7709817886352539, "bits_per_byte": 1.112291603080583, "num_chars": 2}, {"sum_logits": -1.6281566619873047, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.6281566619873047, "logits_per_char": -0.8140783309936523, "bits_per_byte": 1.17446677102055, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 834, "native_id": "TIMSS_1995_8_Q15", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5240442752838135, "logits_per_token_corr": -1.5240442752838135, "logits_per_char_corr": -0.7620221376419067, "bits_per_byte_corr": 1.0993655590243758}, "model_output": [{"sum_logits": -1.5240442752838135, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.5240442752838135, "logits_per_char": -0.7620221376419067, "bits_per_byte": 1.0993655590243758, "num_chars": 2}, {"sum_logits": -1.1096155643463135, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.1096155643463135, "logits_per_char": -0.5548077821731567, "bits_per_byte": 0.8004184359883925, "num_chars": 2}, {"sum_logits": -1.586355447769165, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.586355447769165, "logits_per_char": -0.7931777238845825, "bits_per_byte": 1.1443135687927048, "num_chars": 2}, {"sum_logits": -1.4666082859039307, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4666082859039307, "logits_per_char": -0.7333041429519653, "bits_per_byte": 1.057934250500892, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 835, "native_id": "MCAS_1999_4_23", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3490619659423828, "logits_per_token_corr": -1.3490619659423828, "logits_per_char_corr": -0.6745309829711914, "bits_per_byte_corr": 0.9731425040591948}, "model_output": [{"sum_logits": -1.3490619659423828, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3490619659423828, "logits_per_char": -0.6745309829711914, "bits_per_byte": 0.9731425040591948, "num_chars": 2}, {"sum_logits": -0.9630680680274963, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -0.9630680680274963, "logits_per_char": -0.48153403401374817, "bits_per_byte": 0.694706762891391, "num_chars": 2}, {"sum_logits": -1.4328632354736328, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4328632354736328, "logits_per_char": -0.7164316177368164, "bits_per_byte": 1.0335923420457052, "num_chars": 2}, {"sum_logits": -2.2295398712158203, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -2.2295398712158203, "logits_per_char": -1.1147699356079102, "bits_per_byte": 1.6082730578347966, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 836, "native_id": "TIMSS_1995_8_J7", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7407931089401245, "logits_per_token_corr": -1.7407931089401245, "logits_per_char_corr": -0.8703965544700623, "bits_per_byte_corr": 1.2557167927417014}, "model_output": [{"sum_logits": -1.5905035734176636, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.5905035734176636, "logits_per_char": -0.7952517867088318, "bits_per_byte": 1.1473058089437436, "num_chars": 2}, {"sum_logits": -1.2478350400924683, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.2478350400924683, "logits_per_char": -0.6239175200462341, "bits_per_byte": 0.9001227120950891, "num_chars": 2}, {"sum_logits": -1.2218455076217651, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": true, "logits_per_token": -1.2218455076217651, "logits_per_char": -0.6109227538108826, "bits_per_byte": 0.8813752272898225, "num_chars": 2}, {"sum_logits": -1.7407931089401245, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.7407931089401245, "logits_per_char": -0.8703965544700623, "bits_per_byte": 1.2557167927417014, "num_chars": 2}, {"sum_logits": -3.44964599609375, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -3.44964599609375, "logits_per_char": -1.724822998046875, "bits_per_byte": 2.4883935856952486, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 837, "native_id": "Mercury_SC_LBS10018", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0437921285629272, "logits_per_token_corr": -1.0437921285629272, "logits_per_char_corr": -0.5218960642814636, "bits_per_byte_corr": 0.7529368637988761}, "model_output": [{"sum_logits": -1.0437921285629272, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.0437921285629272, "logits_per_char": -0.5218960642814636, "bits_per_byte": 0.7529368637988761, "num_chars": 2}, {"sum_logits": -1.3177942037582397, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3177942037582397, "logits_per_char": -0.6588971018791199, "bits_per_byte": 0.9505875813377993, "num_chars": 2}, {"sum_logits": -1.535096526145935, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.535096526145935, "logits_per_char": -0.7675482630729675, "bits_per_byte": 1.1073380727791033, "num_chars": 2}, {"sum_logits": -1.996390700340271, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.996390700340271, "logits_per_char": -0.9981953501701355, "bits_per_byte": 1.4400914815299113, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 838, "native_id": "Mercury_SC_406855", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.0495662689208984, "logits_per_token_corr": -2.0495662689208984, "logits_per_char_corr": -1.0247831344604492, "bits_per_byte_corr": 1.4784495460738}, "model_output": [{"sum_logits": -1.1811180114746094, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.1811180114746094, "logits_per_char": -0.5905590057373047, "bits_per_byte": 0.8519965489301384, "num_chars": 2}, {"sum_logits": -1.0696296691894531, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.0696296691894531, "logits_per_char": -0.5348148345947266, "bits_per_byte": 0.7715747096642176, "num_chars": 2}, {"sum_logits": -1.557870864868164, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.557870864868164, "logits_per_char": -0.778935432434082, "bits_per_byte": 1.1237662855461577, "num_chars": 2}, {"sum_logits": -2.0495662689208984, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -2.0495662689208984, "logits_per_char": -1.0247831344604492, "bits_per_byte": 1.4784495460738, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 839, "native_id": "Mercury_SC_415457", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2992990016937256, "logits_per_token_corr": -1.2992990016937256, "logits_per_char_corr": -0.6496495008468628, "bits_per_byte_corr": 0.9372461131884328}, "model_output": [{"sum_logits": -1.6610944271087646, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.6610944271087646, "logits_per_char": -0.8305472135543823, "bits_per_byte": 1.198226346219915, "num_chars": 2}, {"sum_logits": -1.3064401149749756, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3064401149749756, "logits_per_char": -0.6532200574874878, "bits_per_byte": 0.9423973375470793, "num_chars": 2}, {"sum_logits": -1.388599157333374, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.388599157333374, "logits_per_char": -0.694299578666687, "bits_per_byte": 1.0016625590344457, "num_chars": 2}, {"sum_logits": -1.2992990016937256, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.2992990016937256, "logits_per_char": -0.6496495008468628, "bits_per_byte": 0.9372461131884328, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 840, "native_id": "NYSEDREGENTS_2015_4_25", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2653664350509644, "logits_per_token_corr": -1.2653664350509644, "logits_per_char_corr": -0.6326832175254822, "bits_per_byte_corr": 0.9127689403783422}, "model_output": [{"sum_logits": -1.70262610912323, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.70262610912323, "logits_per_char": -0.851313054561615, "bits_per_byte": 1.2281851220609599, "num_chars": 2}, {"sum_logits": -1.645790934562683, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.645790934562683, "logits_per_char": -0.8228954672813416, "bits_per_byte": 1.1871872098176506, "num_chars": 2}, {"sum_logits": -1.2653664350509644, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.2653664350509644, "logits_per_char": -0.6326832175254822, "bits_per_byte": 0.9127689403783422, "num_chars": 2}, {"sum_logits": -1.1198433637619019, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.1198433637619019, "logits_per_char": -0.5599216818809509, "bits_per_byte": 0.807796233736436, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 841, "native_id": "Mercury_7058135", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1766698360443115, "logits_per_token_corr": -1.1766698360443115, "logits_per_char_corr": -0.5883349180221558, "bits_per_byte_corr": 0.8487878686129887}, "model_output": [{"sum_logits": -1.2564904689788818, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.2564904689788818, "logits_per_char": -0.6282452344894409, "bits_per_byte": 0.9063662842606915, "num_chars": 2}, {"sum_logits": -1.1766698360443115, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.1766698360443115, "logits_per_char": -0.5883349180221558, "bits_per_byte": 0.8487878686129887, "num_chars": 2}, {"sum_logits": -1.3460729122161865, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3460729122161865, "logits_per_char": -0.6730364561080933, "bits_per_byte": 0.9709863575653263, "num_chars": 2}, {"sum_logits": -2.0561959743499756, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -2.0561959743499756, "logits_per_char": -1.0280979871749878, "bits_per_byte": 1.4832318676463456, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 842, "native_id": "MDSA_2008_4_19", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1558518409729004, "logits_per_token_corr": -1.1558518409729004, "logits_per_char_corr": -0.5779259204864502, "bits_per_byte_corr": 0.8337708594875901}, "model_output": [{"sum_logits": -1.43461275100708, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.43461275100708, "logits_per_char": -0.71730637550354, "bits_per_byte": 1.0348543507377372, "num_chars": 2}, {"sum_logits": -1.1558518409729004, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.1558518409729004, "logits_per_char": -0.5779259204864502, "bits_per_byte": 0.8337708594875901, "num_chars": 2}, {"sum_logits": -1.2441506385803223, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.2441506385803223, "logits_per_char": -0.6220753192901611, "bits_per_byte": 0.8974649781999787, "num_chars": 2}, {"sum_logits": -2.016258716583252, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -2.016258716583252, "logits_per_char": -1.008129358291626, "bits_per_byte": 1.4544232257829466, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 843, "native_id": "AKDE&ED_2008_8_45", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5853760242462158, "logits_per_token_corr": -1.5853760242462158, "logits_per_char_corr": -0.7926880121231079, "bits_per_byte_corr": 1.1436070640629599}, "model_output": [{"sum_logits": -1.759272813796997, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.759272813796997, "logits_per_char": -0.8796364068984985, "bits_per_byte": 1.2690470820187618, "num_chars": 2}, {"sum_logits": -1.240617036819458, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.240617036819458, "logits_per_char": -0.620308518409729, "bits_per_byte": 0.8949160233315392, "num_chars": 2}, {"sum_logits": -1.1358330249786377, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.1358330249786377, "logits_per_char": -0.5679165124893188, "bits_per_byte": 0.819330336207884, "num_chars": 2}, {"sum_logits": -1.5853760242462158, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.5853760242462158, "logits_per_char": -0.7926880121231079, "bits_per_byte": 1.1436070640629599, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 844, "native_id": "Mercury_7131758", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5203807353973389, "logits_per_token_corr": -1.5203807353973389, "logits_per_char_corr": -0.7601903676986694, "bits_per_byte_corr": 1.096722873611216}, "model_output": [{"sum_logits": -1.5203807353973389, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.5203807353973389, "logits_per_char": -0.7601903676986694, "bits_per_byte": 1.096722873611216, "num_chars": 2}, {"sum_logits": -1.2167308330535889, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.2167308330535889, "logits_per_char": -0.6083654165267944, "bits_per_byte": 0.8776857694721855, "num_chars": 2}, {"sum_logits": -1.402489423751831, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.402489423751831, "logits_per_char": -0.7012447118759155, "bits_per_byte": 1.01168226827372, "num_chars": 2}, {"sum_logits": -1.5538661479949951, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.5538661479949951, "logits_per_char": -0.7769330739974976, "bits_per_byte": 1.120877492959613, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 845, "native_id": "NYSEDREGENTS_2013_8_10", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2302179336547852, "logits_per_token_corr": -1.2302179336547852, "logits_per_char_corr": -0.6151089668273926, "bits_per_byte_corr": 0.8874146560488507}, "model_output": [{"sum_logits": -1.2713956832885742, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.2713956832885742, "logits_per_char": -0.6356978416442871, "bits_per_byte": 0.9171181236446895, "num_chars": 2}, {"sum_logits": -1.2302179336547852, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.2302179336547852, "logits_per_char": -0.6151089668273926, "bits_per_byte": 0.8874146560488507, "num_chars": 2}, {"sum_logits": -1.4636754989624023, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4636754989624023, "logits_per_char": -0.7318377494812012, "bits_per_byte": 1.0558186919126271, "num_chars": 2}, {"sum_logits": -1.739211082458496, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.739211082458496, "logits_per_char": -0.869605541229248, "bits_per_byte": 1.2545756018619005, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 846, "native_id": "Mercury_SC_401783", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7005563974380493, "logits_per_token_corr": -1.7005563974380493, "logits_per_char_corr": -0.8502781987190247, "bits_per_byte_corr": 1.2266921406688187}, "model_output": [{"sum_logits": -1.3609625101089478, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3609625101089478, "logits_per_char": -0.6804812550544739, "bits_per_byte": 0.9817269320856926, "num_chars": 2}, {"sum_logits": -1.2388445138931274, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.2388445138931274, "logits_per_char": -0.6194222569465637, "bits_per_byte": 0.8936374183136988, "num_chars": 2}, {"sum_logits": -1.3740984201431274, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3740984201431274, "logits_per_char": -0.6870492100715637, "bits_per_byte": 0.9912024882176368, "num_chars": 2}, {"sum_logits": -1.7005563974380493, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.7005563974380493, "logits_per_char": -0.8502781987190247, "bits_per_byte": 1.2266921406688187, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 847, "native_id": "Mercury_7190120", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0101591348648071, "logits_per_token_corr": -1.0101591348648071, "logits_per_char_corr": -0.5050795674324036, "bits_per_byte_corr": 0.7286757871895949}, "model_output": [{"sum_logits": -1.382017731666565, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.382017731666565, "logits_per_char": -0.6910088658332825, "bits_per_byte": 0.9969150639486999, "num_chars": 2}, {"sum_logits": -1.0101591348648071, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.0101591348648071, "logits_per_char": -0.5050795674324036, "bits_per_byte": 0.7286757871895949, "num_chars": 2}, {"sum_logits": -1.4119592905044556, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4119592905044556, "logits_per_char": -0.7059796452522278, "bits_per_byte": 1.0185133331746703, "num_chars": 2}, {"sum_logits": -2.065493106842041, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -2.065493106842041, "logits_per_char": -1.0327465534210205, "bits_per_byte": 1.4899383311167456, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 848, "native_id": "Mercury_409317", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.555834174156189, "logits_per_token_corr": -1.555834174156189, "logits_per_char_corr": -0.7779170870780945, "bits_per_byte_corr": 1.1222971237511612}, "model_output": [{"sum_logits": -1.5718311071395874, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.5718311071395874, "logits_per_char": -0.7859155535697937, "bits_per_byte": 1.1338364716934604, "num_chars": 2}, {"sum_logits": -1.555834174156189, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.555834174156189, "logits_per_char": -0.7779170870780945, "bits_per_byte": 1.1222971237511612, "num_chars": 2}, {"sum_logits": -1.2072852849960327, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": true, "logits_per_token": -1.2072852849960327, "logits_per_char": -0.6036426424980164, "bits_per_byte": 0.8708722468016233, "num_chars": 2}, {"sum_logits": -1.509630560874939, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.509630560874939, "logits_per_char": -0.7548152804374695, "bits_per_byte": 1.0889682618751317, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 849, "native_id": "Mercury_7268240", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6314036846160889, "logits_per_token_corr": -1.6314036846160889, "logits_per_char_corr": -0.8157018423080444, "bits_per_byte_corr": 1.1768090027426523}, "model_output": [{"sum_logits": -1.3315017223358154, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3315017223358154, "logits_per_char": -0.6657508611679077, "bits_per_byte": 0.9604754658751873, "num_chars": 2}, {"sum_logits": -1.328840970993042, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.328840970993042, "logits_per_char": -0.664420485496521, "bits_per_byte": 0.9585561394915569, "num_chars": 2}, {"sum_logits": -1.3381755352020264, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3381755352020264, "logits_per_char": -0.6690877676010132, "bits_per_byte": 0.9652896042381425, "num_chars": 2}, {"sum_logits": -1.6314036846160889, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.6314036846160889, "logits_per_char": -0.8157018423080444, "bits_per_byte": 1.1768090027426523, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 850, "native_id": "Mercury_7228358", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.0575013160705566, "logits_per_token_corr": -2.0575013160705566, "logits_per_char_corr": -1.0287506580352783, "bits_per_byte_corr": 1.4841734726598201}, "model_output": [{"sum_logits": -1.3438345193862915, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.3438345193862915, "logits_per_char": -0.6719172596931458, "bits_per_byte": 0.9693716984476995, "num_chars": 2}, {"sum_logits": -1.05978262424469, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.05978262424469, "logits_per_char": -0.529891312122345, "bits_per_byte": 0.7644715682096022, "num_chars": 2}, {"sum_logits": -1.38387930393219, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.38387930393219, "logits_per_char": -0.691939651966095, "bits_per_byte": 0.9982579044866376, "num_chars": 2}, {"sum_logits": -2.0575013160705566, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -2.0575013160705566, "logits_per_char": -1.0287506580352783, "bits_per_byte": 1.4841734726598201, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 851, "native_id": "MCAS_2004_5_33", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.44633150100708, "logits_per_token_corr": -1.44633150100708, "logits_per_char_corr": -0.72316575050354, "bits_per_byte_corr": 1.0433076419929521}, "model_output": [{"sum_logits": -1.3500065803527832, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.3500065803527832, "logits_per_char": -0.6750032901763916, "bits_per_byte": 0.9738238993219137, "num_chars": 2}, {"sum_logits": -0.9964013695716858, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": true, "logits_per_token": -0.9964013695716858, "logits_per_char": -0.4982006847858429, "bits_per_byte": 0.7187516573085375, "num_chars": 2}, {"sum_logits": -1.44633150100708, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.44633150100708, "logits_per_char": -0.72316575050354, "bits_per_byte": 1.0433076419929521, "num_chars": 2}, {"sum_logits": -2.0638928413391113, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -2.0638928413391113, "logits_per_char": -1.0319464206695557, "bits_per_byte": 1.4887839835641536, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 852, "native_id": "Mercury_7008855", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.141502022743225, "logits_per_token_corr": -1.141502022743225, "logits_per_char_corr": -0.5707510113716125, "bits_per_byte_corr": 0.8234196536887773}, "model_output": [{"sum_logits": -1.2980877161026, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.2980877161026, "logits_per_char": -0.6490438580513, "bits_per_byte": 0.9363723553307236, "num_chars": 2}, {"sum_logits": -1.141502022743225, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.141502022743225, "logits_per_char": -0.5707510113716125, "bits_per_byte": 0.8234196536887773, "num_chars": 2}, {"sum_logits": -1.371005654335022, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.371005654335022, "logits_per_char": -0.685502827167511, "bits_per_byte": 0.9889715292706428, "num_chars": 2}, {"sum_logits": -1.948999047279358, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.948999047279358, "logits_per_char": -0.974499523639679, "bits_per_byte": 1.405905630104632, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 853, "native_id": "Mercury_7057085", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1045714616775513, "logits_per_token_corr": -1.1045714616775513, "logits_per_char_corr": -0.5522857308387756, "bits_per_byte_corr": 0.7967798850354109}, "model_output": [{"sum_logits": -1.4433261156082153, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4433261156082153, "logits_per_char": -0.7216630578041077, "bits_per_byte": 1.0411397146874994, "num_chars": 2}, {"sum_logits": -1.1045714616775513, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.1045714616775513, "logits_per_char": -0.5522857308387756, "bits_per_byte": 0.7967798850354109, "num_chars": 2}, {"sum_logits": -1.2229739427566528, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.2229739427566528, "logits_per_char": -0.6114869713783264, "bits_per_byte": 0.8821892211763568, "num_chars": 2}, {"sum_logits": -2.087895393371582, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -2.087895393371582, "logits_per_char": -1.043947696685791, "bits_per_byte": 1.5060981649571286, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 854, "native_id": "Mercury_7171728", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.304961919784546, "logits_per_token_corr": -1.304961919784546, "logits_per_char_corr": -0.652480959892273, "bits_per_byte_corr": 0.941331045111729}, "model_output": [{"sum_logits": -1.553842306137085, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.553842306137085, "logits_per_char": -0.7769211530685425, "bits_per_byte": 1.1208602946945267, "num_chars": 2}, {"sum_logits": -1.304961919784546, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.304961919784546, "logits_per_char": -0.652480959892273, "bits_per_byte": 0.941331045111729, "num_chars": 2}, {"sum_logits": -1.220214605331421, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.220214605331421, "logits_per_char": -0.6101073026657104, "bits_per_byte": 0.8801987799665947, "num_chars": 2}, {"sum_logits": -1.6828248500823975, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.6828248500823975, "logits_per_char": -0.8414124250411987, "bits_per_byte": 1.2139015329501661, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 855, "native_id": "NAEP_2005_4_S14+3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1075308322906494, "logits_per_token_corr": -1.1075308322906494, "logits_per_char_corr": -0.5537654161453247, "bits_per_byte_corr": 0.798914619689247}, "model_output": [{"sum_logits": -1.7906668186187744, "num_tokens": 1, "num_tokens_all": 499, "is_greedy": false, "logits_per_token": -1.7906668186187744, "logits_per_char": -0.8953334093093872, "bits_per_byte": 1.2916930695537894, "num_chars": 2}, {"sum_logits": -1.3644564151763916, "num_tokens": 1, "num_tokens_all": 499, "is_greedy": false, "logits_per_token": -1.3644564151763916, "logits_per_char": -0.6822282075881958, "bits_per_byte": 0.9842472518427635, "num_chars": 2}, {"sum_logits": -1.1075308322906494, "num_tokens": 1, "num_tokens_all": 499, "is_greedy": true, "logits_per_token": -1.1075308322906494, "logits_per_char": -0.5537654161453247, "bits_per_byte": 0.798914619689247, "num_chars": 2}, {"sum_logits": -1.440857172012329, "num_tokens": 1, "num_tokens_all": 499, "is_greedy": false, "logits_per_token": -1.440857172012329, "logits_per_char": -0.7204285860061646, "bits_per_byte": 1.0393587483464883, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 856, "native_id": "Mercury_7024395", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2991234064102173, "logits_per_token_corr": -1.2991234064102173, "logits_per_char_corr": -0.6495617032051086, "bits_per_byte_corr": 0.9371194479660722}, "model_output": [{"sum_logits": -1.2991234064102173, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.2991234064102173, "logits_per_char": -0.6495617032051086, "bits_per_byte": 0.9371194479660722, "num_chars": 2}, {"sum_logits": -1.1863476037979126, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": true, "logits_per_token": -1.1863476037979126, "logits_per_char": -0.5931738018989563, "bits_per_byte": 0.8557689023854914, "num_chars": 2}, {"sum_logits": -1.4180370569229126, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.4180370569229126, "logits_per_char": -0.7090185284614563, "bits_per_byte": 1.0228975149104682, "num_chars": 2}, {"sum_logits": -1.8664242029190063, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.8664242029190063, "logits_per_char": -0.9332121014595032, "bits_per_byte": 1.3463404708741606, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 857, "native_id": "NYSEDREGENTS_2012_8_28", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.84446382522583, "logits_per_token_corr": -1.84446382522583, "logits_per_char_corr": -0.922231912612915, "bits_per_byte_corr": 1.3304994068771523}, "model_output": [{"sum_logits": -0.8863416314125061, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -0.8863416314125061, "logits_per_char": -0.44317081570625305, "bits_per_byte": 0.6393603380865873, "num_chars": 2}, {"sum_logits": -1.0533814430236816, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.0533814430236816, "logits_per_char": -0.5266907215118408, "bits_per_byte": 0.7598540920079088, "num_chars": 2}, {"sum_logits": -1.84446382522583, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.84446382522583, "logits_per_char": -0.922231912612915, "bits_per_byte": 1.3304994068771523, "num_chars": 2}, {"sum_logits": -2.6022286415100098, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -2.6022286415100098, "logits_per_char": -1.3011143207550049, "bits_per_byte": 1.8771111781842063, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 858, "native_id": "Mercury_7090790", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4576671123504639, "logits_per_token_corr": -1.4576671123504639, "logits_per_char_corr": -0.7288335561752319, "bits_per_byte_corr": 1.0514845571282303}, "model_output": [{"sum_logits": -1.4576671123504639, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4576671123504639, "logits_per_char": -0.7288335561752319, "bits_per_byte": 1.0514845571282303, "num_chars": 2}, {"sum_logits": -1.3973453044891357, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3973453044891357, "logits_per_char": -0.6986726522445679, "bits_per_byte": 1.0079715705987016, "num_chars": 2}, {"sum_logits": -1.282585859298706, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.282585859298706, "logits_per_char": -0.641292929649353, "bits_per_byte": 0.9251901293629412, "num_chars": 2}, {"sum_logits": -1.4634692668914795, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4634692668914795, "logits_per_char": -0.7317346334457397, "bits_per_byte": 1.0556699269196306, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 859, "native_id": "TIMSS_2003_8_pg87", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3484728336334229, "logits_per_token_corr": -1.3484728336334229, "logits_per_char_corr": -0.6742364168167114, "bits_per_byte_corr": 0.9727175349289126}, "model_output": [{"sum_logits": -1.628845453262329, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.628845453262329, "logits_per_char": -0.8144227266311646, "bits_per_byte": 1.1749636288988932, "num_chars": 2}, {"sum_logits": -1.3484728336334229, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.3484728336334229, "logits_per_char": -0.6742364168167114, "bits_per_byte": 0.9727175349289126, "num_chars": 2}, {"sum_logits": -1.2770636081695557, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.2770636081695557, "logits_per_char": -0.6385318040847778, "bits_per_byte": 0.921206667203654, "num_chars": 2}, {"sum_logits": -1.431682825088501, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.431682825088501, "logits_per_char": -0.7158414125442505, "bits_per_byte": 1.0327408559412827, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 860, "native_id": "Mercury_SC_407382", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4167320728302002, "logits_per_token_corr": -1.4167320728302002, "logits_per_char_corr": -0.7083660364151001, "bits_per_byte_corr": 1.0219561678709699}, "model_output": [{"sum_logits": -1.242194414138794, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.242194414138794, "logits_per_char": -0.621097207069397, "bits_per_byte": 0.8960538605496484, "num_chars": 2}, {"sum_logits": -1.0754501819610596, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.0754501819610596, "logits_per_char": -0.5377250909805298, "bits_per_byte": 0.7757733221197344, "num_chars": 2}, {"sum_logits": -1.4167320728302002, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4167320728302002, "logits_per_char": -0.7083660364151001, "bits_per_byte": 1.0219561678709699, "num_chars": 2}, {"sum_logits": -2.119563341140747, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -2.119563341140747, "logits_per_char": -1.0597816705703735, "bits_per_byte": 1.5289417605579974, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 861, "native_id": "MDSA_2010_4_20", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2556829452514648, "logits_per_token_corr": -1.2556829452514648, "logits_per_char_corr": -0.6278414726257324, "bits_per_byte_corr": 0.9057837790222187}, "model_output": [{"sum_logits": -1.304438591003418, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.304438591003418, "logits_per_char": -0.652219295501709, "bits_per_byte": 0.940953543193085, "num_chars": 2}, {"sum_logits": -1.2556829452514648, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2556829452514648, "logits_per_char": -0.6278414726257324, "bits_per_byte": 0.9057837790222187, "num_chars": 2}, {"sum_logits": -1.3120660781860352, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3120660781860352, "logits_per_char": -0.6560330390930176, "bits_per_byte": 0.9464556121594919, "num_chars": 2}, {"sum_logits": -1.8385190963745117, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.8385190963745117, "logits_per_char": -0.9192595481872559, "bits_per_byte": 1.326211191460536, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 862, "native_id": "Mercury_SC_405019", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6455230712890625, "logits_per_token_corr": -1.6455230712890625, "logits_per_char_corr": -0.8227615356445312, "bits_per_byte_corr": 1.1869939873094062}, "model_output": [{"sum_logits": -1.5619754791259766, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.5619754791259766, "logits_per_char": -0.7809877395629883, "bits_per_byte": 1.126727138863414, "num_chars": 2}, {"sum_logits": -1.062398910522461, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": true, "logits_per_token": -1.062398910522461, "logits_per_char": -0.5311994552612305, "bits_per_byte": 0.7663588198288466, "num_chars": 2}, {"sum_logits": -1.4301128387451172, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.4301128387451172, "logits_per_char": -0.7150564193725586, "bits_per_byte": 1.0316083501853504, "num_chars": 2}, {"sum_logits": -1.6455230712890625, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.6455230712890625, "logits_per_char": -0.8227615356445312, "bits_per_byte": 1.1869939873094062, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 863, "native_id": "Mercury_7123078", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.058795213699341, "logits_per_token_corr": -2.058795213699341, "logits_per_char_corr": -1.0293976068496704, "bits_per_byte_corr": 1.4851068225060533}, "model_output": [{"sum_logits": -1.1937439441680908, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.1937439441680908, "logits_per_char": -0.5968719720840454, "bits_per_byte": 0.8611042341718868, "num_chars": 2}, {"sum_logits": -1.0522205829620361, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.0522205829620361, "logits_per_char": -0.5261102914810181, "bits_per_byte": 0.7590167084808571, "num_chars": 2}, {"sum_logits": -1.5759155750274658, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.5759155750274658, "logits_per_char": -0.7879577875137329, "bits_per_byte": 1.1367827924767187, "num_chars": 2}, {"sum_logits": -2.058795213699341, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -2.058795213699341, "logits_per_char": -1.0293976068496704, "bits_per_byte": 1.4851068225060533, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 864, "native_id": "Mercury_400084", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9291627407073975, "logits_per_token_corr": -1.9291627407073975, "logits_per_char_corr": -0.9645813703536987, "bits_per_byte_corr": 1.3915967595441616}, "model_output": [{"sum_logits": -1.9676949977874756, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.9676949977874756, "logits_per_char": -0.9838474988937378, "bits_per_byte": 1.419391907646025, "num_chars": 2}, {"sum_logits": -1.9128835201263428, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.9128835201263428, "logits_per_char": -0.9564417600631714, "bits_per_byte": 1.3798537841432406, "num_chars": 2}, {"sum_logits": -1.19523024559021, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.19523024559021, "logits_per_char": -0.597615122795105, "bits_per_byte": 0.8621763740173662, "num_chars": 2}, {"sum_logits": -1.9291627407073975, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.9291627407073975, "logits_per_char": -0.9645813703536987, "bits_per_byte": 1.3915967595441616, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 865, "native_id": "Mercury_7139650", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7492023706436157, "logits_per_token_corr": -1.7492023706436157, "logits_per_char_corr": -0.8746011853218079, "bits_per_byte_corr": 1.261782792820288}, "model_output": [{"sum_logits": -1.2476402521133423, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.2476402521133423, "logits_per_char": -0.6238201260566711, "bits_per_byte": 0.899982202269334, "num_chars": 2}, {"sum_logits": -1.2575126886367798, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.2575126886367798, "logits_per_char": -0.6287563443183899, "bits_per_byte": 0.9071036598762664, "num_chars": 2}, {"sum_logits": -1.7492023706436157, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.7492023706436157, "logits_per_char": -0.8746011853218079, "bits_per_byte": 1.261782792820288, "num_chars": 2}, {"sum_logits": -1.5843006372451782, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.5843006372451782, "logits_per_char": -0.7921503186225891, "bits_per_byte": 1.1428313363162426, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 866, "native_id": "Mercury_417150", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2513638734817505, "logits_per_token_corr": -1.2513638734817505, "logits_per_char_corr": -0.6256819367408752, "bits_per_byte_corr": 0.9026682273105113}, "model_output": [{"sum_logits": -1.585760235786438, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.585760235786438, "logits_per_char": -0.792880117893219, "bits_per_byte": 1.1438842141048255, "num_chars": 2}, {"sum_logits": -1.2513638734817505, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.2513638734817505, "logits_per_char": -0.6256819367408752, "bits_per_byte": 0.9026682273105113, "num_chars": 2}, {"sum_logits": -1.2593175172805786, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.2593175172805786, "logits_per_char": -0.6296587586402893, "bits_per_byte": 0.9084055685432988, "num_chars": 2}, {"sum_logits": -1.5465375185012817, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.5465375185012817, "logits_per_char": -0.7732687592506409, "bits_per_byte": 1.1155910042460628, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 867, "native_id": "Mercury_SC_402256", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2911590337753296, "logits_per_token_corr": -1.2911590337753296, "logits_per_char_corr": -0.6455795168876648, "bits_per_byte_corr": 0.9313743675139959}, "model_output": [{"sum_logits": -1.3902992010116577, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3902992010116577, "logits_per_char": -0.6951496005058289, "bits_per_byte": 1.0028888813264238, "num_chars": 2}, {"sum_logits": -1.4026340246200562, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4026340246200562, "logits_per_char": -0.7013170123100281, "bits_per_byte": 1.0117865757514686, "num_chars": 2}, {"sum_logits": -1.2911590337753296, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.2911590337753296, "logits_per_char": -0.6455795168876648, "bits_per_byte": 0.9313743675139959, "num_chars": 2}, {"sum_logits": -1.563555121421814, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.563555121421814, "logits_per_char": -0.781777560710907, "bits_per_byte": 1.1278666099167063, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 868, "native_id": "TIMSS_2007_8_pg53", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3189183473587036, "logits_per_token_corr": -1.3189183473587036, "logits_per_char_corr": -0.6594591736793518, "bits_per_byte_corr": 0.951398479536618}, "model_output": [{"sum_logits": -1.3189183473587036, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3189183473587036, "logits_per_char": -0.6594591736793518, "bits_per_byte": 0.951398479536618, "num_chars": 2}, {"sum_logits": -1.2154427766799927, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.2154427766799927, "logits_per_char": -0.6077213883399963, "bits_per_byte": 0.8767566332008985, "num_chars": 2}, {"sum_logits": -1.593542218208313, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.593542218208313, "logits_per_char": -0.7967711091041565, "bits_per_byte": 1.1494977278289915, "num_chars": 2}, {"sum_logits": -1.669469952583313, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.669469952583313, "logits_per_char": -0.8347349762916565, "bits_per_byte": 1.2042680107534045, "num_chars": 2}, {"sum_logits": -3.474029541015625, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -3.474029541015625, "logits_per_char": -1.7370147705078125, "bits_per_byte": 2.5059825953643022, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 869, "native_id": "MCAS_2006_9_17-v1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8427505493164062, "logits_per_token_corr": -1.8427505493164062, "logits_per_char_corr": -0.9213752746582031, "bits_per_byte_corr": 1.3292635395480512}, "model_output": [{"sum_logits": -1.5106277465820312, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.5106277465820312, "logits_per_char": -0.7553138732910156, "bits_per_byte": 1.089687579312366, "num_chars": 2}, {"sum_logits": -1.0848617553710938, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": true, "logits_per_token": -1.0848617553710938, "logits_per_char": -0.5424308776855469, "bits_per_byte": 0.7825623372625485, "num_chars": 2}, {"sum_logits": -1.3112316131591797, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.3112316131591797, "logits_per_char": -0.6556158065795898, "bits_per_byte": 0.9458536728814716, "num_chars": 2}, {"sum_logits": -1.8427505493164062, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.8427505493164062, "logits_per_char": -0.9213752746582031, "bits_per_byte": 1.3292635395480512, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 870, "native_id": "Mercury_401728", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.776172161102295, "logits_per_token_corr": -1.776172161102295, "logits_per_char_corr": -0.8880860805511475, "bits_per_byte_corr": 1.2812373842945775}, "model_output": [{"sum_logits": -1.3549952507019043, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3549952507019043, "logits_per_char": -0.6774976253509521, "bits_per_byte": 0.9774224643085697, "num_chars": 2}, {"sum_logits": -1.330491542816162, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.330491542816162, "logits_per_char": -0.665245771408081, "bits_per_byte": 0.959746775383481, "num_chars": 2}, {"sum_logits": -1.2422175407409668, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2422175407409668, "logits_per_char": -0.6211087703704834, "bits_per_byte": 0.8960705428667821, "num_chars": 2}, {"sum_logits": -1.776172161102295, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.776172161102295, "logits_per_char": -0.8880860805511475, "bits_per_byte": 1.2812373842945775, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 871, "native_id": "Mercury_7192798", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.200061321258545, "logits_per_token_corr": -1.200061321258545, "logits_per_char_corr": -0.6000306606292725, "bits_per_byte_corr": 0.8656612584718019}, "model_output": [{"sum_logits": -1.6913046836853027, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.6913046836853027, "logits_per_char": -0.8456523418426514, "bits_per_byte": 1.2200184398934082, "num_chars": 2}, {"sum_logits": -1.3422846794128418, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3422846794128418, "logits_per_char": -0.6711423397064209, "bits_per_byte": 0.9682537252257651, "num_chars": 2}, {"sum_logits": -1.200061321258545, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.200061321258545, "logits_per_char": -0.6000306606292725, "bits_per_byte": 0.8656612584718019, "num_chars": 2}, {"sum_logits": -1.4665064811706543, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4665064811706543, "logits_per_char": -0.7332532405853271, "bits_per_byte": 1.0578608139089736, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 872, "native_id": "Mercury_7221078", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2093238830566406, "logits_per_token_corr": -1.2093238830566406, "logits_per_char_corr": -0.6046619415283203, "bits_per_byte_corr": 0.8723427844578268}, "model_output": [{"sum_logits": -1.8526878356933594, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.8526878356933594, "logits_per_char": -0.9263439178466797, "bits_per_byte": 1.3364317764360183, "num_chars": 2}, {"sum_logits": -1.2289924621582031, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.2289924621582031, "logits_per_char": -0.6144962310791016, "bits_per_byte": 0.8865306652234153, "num_chars": 2}, {"sum_logits": -1.2093238830566406, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.2093238830566406, "logits_per_char": -0.6046619415283203, "bits_per_byte": 0.8723427844578268, "num_chars": 2}, {"sum_logits": -1.4238243103027344, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4238243103027344, "logits_per_char": -0.7119121551513672, "bits_per_byte": 1.0270721357861896, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 873, "native_id": "Mercury_7004953", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1197657585144043, "logits_per_token_corr": -1.1197657585144043, "logits_per_char_corr": -0.5598828792572021, "bits_per_byte_corr": 0.8077402533835801}, "model_output": [{"sum_logits": -1.5318045616149902, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.5318045616149902, "logits_per_char": -0.7659022808074951, "bits_per_byte": 1.1049634223273133, "num_chars": 2}, {"sum_logits": -1.1197657585144043, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.1197657585144043, "logits_per_char": -0.5598828792572021, "bits_per_byte": 0.8077402533835801, "num_chars": 2}, {"sum_logits": -1.2951502799987793, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.2951502799987793, "logits_per_char": -0.6475751399993896, "bits_per_byte": 0.9342534430807669, "num_chars": 2}, {"sum_logits": -1.7910094261169434, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.7910094261169434, "logits_per_char": -0.8955047130584717, "bits_per_byte": 1.2919402086230793, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 874, "native_id": "TIMSS_2003_8_pg94", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6898893117904663, "logits_per_token_corr": -1.6898893117904663, "logits_per_char_corr": -0.8449446558952332, "bits_per_byte_corr": 1.2189974648865602}, "model_output": [{"sum_logits": -1.4459317922592163, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4459317922592163, "logits_per_char": -0.7229658961296082, "bits_per_byte": 1.0430193130787804, "num_chars": 2}, {"sum_logits": -1.1353639364242554, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.1353639364242554, "logits_per_char": -0.5676819682121277, "bits_per_byte": 0.8189919603423111, "num_chars": 2}, {"sum_logits": -1.4541410207748413, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4541410207748413, "logits_per_char": -0.7270705103874207, "bits_per_byte": 1.048941019713293, "num_chars": 2}, {"sum_logits": -1.6898893117904663, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.6898893117904663, "logits_per_char": -0.8449446558952332, "bits_per_byte": 1.2189974648865602, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 875, "native_id": "Mercury_7095060", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1945815086364746, "logits_per_token_corr": -1.1945815086364746, "logits_per_char_corr": -0.5972907543182373, "bits_per_byte_corr": 0.8617084092243682}, "model_output": [{"sum_logits": -1.1945815086364746, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.1945815086364746, "logits_per_char": -0.5972907543182373, "bits_per_byte": 0.8617084092243682, "num_chars": 2}, {"sum_logits": -1.1841216087341309, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.1841216087341309, "logits_per_char": -0.5920608043670654, "bits_per_byte": 0.8541631863657096, "num_chars": 2}, {"sum_logits": -1.4958300590515137, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4958300590515137, "logits_per_char": -0.7479150295257568, "bits_per_byte": 1.0790133041039072, "num_chars": 2}, {"sum_logits": -1.8942046165466309, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.8942046165466309, "logits_per_char": -0.9471023082733154, "bits_per_byte": 1.366379803361384, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 876, "native_id": "Mercury_7123358", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1348026990890503, "logits_per_token_corr": -1.1348026990890503, "logits_per_char_corr": -0.5674013495445251, "bits_per_byte_corr": 0.8185871131821798}, "model_output": [{"sum_logits": -1.1348026990890503, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.1348026990890503, "logits_per_char": -0.5674013495445251, "bits_per_byte": 0.8185871131821798, "num_chars": 2}, {"sum_logits": -1.1561630964279175, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.1561630964279175, "logits_per_char": -0.5780815482139587, "bits_per_byte": 0.8339953828382917, "num_chars": 2}, {"sum_logits": -1.5908154249191284, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5908154249191284, "logits_per_char": -0.7954077124595642, "bits_per_byte": 1.1475307622510722, "num_chars": 2}, {"sum_logits": -2.038937568664551, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -2.038937568664551, "logits_per_char": -1.0194687843322754, "bits_per_byte": 1.4707825594983306, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 877, "native_id": "Mercury_7069020", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4024534225463867, "logits_per_token_corr": -1.4024534225463867, "logits_per_char_corr": -0.7012267112731934, "bits_per_byte_corr": 1.0116562988934399}, "model_output": [{"sum_logits": -1.4024534225463867, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4024534225463867, "logits_per_char": -0.7012267112731934, "bits_per_byte": 1.0116562988934399, "num_chars": 2}, {"sum_logits": -1.179356575012207, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.179356575012207, "logits_per_char": -0.5896782875061035, "bits_per_byte": 0.8507259411055631, "num_chars": 2}, {"sum_logits": -1.4054327011108398, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4054327011108398, "logits_per_char": -0.7027163505554199, "bits_per_byte": 1.0138053940986231, "num_chars": 2}, {"sum_logits": -1.681593894958496, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.681593894958496, "logits_per_char": -0.840796947479248, "bits_per_byte": 1.2130135865237608, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 878, "native_id": "TIMSS_2003_8_pg117", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3403407335281372, "logits_per_token_corr": -1.3403407335281372, "logits_per_char_corr": -0.6701703667640686, "bits_per_byte_corr": 0.9668514646819542}, "model_output": [{"sum_logits": -1.4976245164871216, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4976245164871216, "logits_per_char": -0.7488122582435608, "bits_per_byte": 1.080307731525627, "num_chars": 2}, {"sum_logits": -1.3403407335281372, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.3403407335281372, "logits_per_char": -0.6701703667640686, "bits_per_byte": 0.9668514646819542, "num_chars": 2}, {"sum_logits": -1.3557482957839966, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3557482957839966, "logits_per_char": -0.6778741478919983, "bits_per_byte": 0.9779656715113203, "num_chars": 2}, {"sum_logits": -1.6130496263504028, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.6130496263504028, "logits_per_char": -0.8065248131752014, "bits_per_byte": 1.1635693483225966, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 879, "native_id": "VASoL_2008_3_32", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1600587368011475, "logits_per_token_corr": -1.1600587368011475, "logits_per_char_corr": -0.5800293684005737, "bits_per_byte_corr": 0.8368054933620666}, "model_output": [{"sum_logits": -1.2560288906097412, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.2560288906097412, "logits_per_char": -0.6280144453048706, "bits_per_byte": 0.906033325848621, "num_chars": 2}, {"sum_logits": -1.1600587368011475, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.1600587368011475, "logits_per_char": -0.5800293684005737, "bits_per_byte": 0.8368054933620666, "num_chars": 2}, {"sum_logits": -1.3756749629974365, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3756749629974365, "logits_per_char": -0.6878374814987183, "bits_per_byte": 0.9923397234964679, "num_chars": 2}, {"sum_logits": -2.079293966293335, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -2.079293966293335, "logits_per_char": -1.0396469831466675, "bits_per_byte": 1.4998935468619465, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 880, "native_id": "Mercury_SC_400142", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.280932068824768, "logits_per_token_corr": -1.280932068824768, "logits_per_char_corr": -0.640466034412384, "bits_per_byte_corr": 0.9239971717052304}, "model_output": [{"sum_logits": -1.280932068824768, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.280932068824768, "logits_per_char": -0.640466034412384, "bits_per_byte": 0.9239971717052304, "num_chars": 2}, {"sum_logits": -1.2065798044204712, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2065798044204712, "logits_per_char": -0.6032899022102356, "bits_per_byte": 0.8703633501377199, "num_chars": 2}, {"sum_logits": -1.4333826303482056, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4333826303482056, "logits_per_char": -0.7166913151741028, "bits_per_byte": 1.03396700625061, "num_chars": 2}, {"sum_logits": -1.7469106912612915, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.7469106912612915, "logits_per_char": -0.8734553456306458, "bits_per_byte": 1.2601296955801935, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 881, "native_id": "Mercury_7163818", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.726668357849121, "logits_per_token_corr": -1.726668357849121, "logits_per_char_corr": -0.8633341789245605, "bits_per_byte_corr": 1.2455279385653033}, "model_output": [{"sum_logits": -1.2349328994750977, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.2349328994750977, "logits_per_char": -0.6174664497375488, "bits_per_byte": 0.8908157849523161, "num_chars": 2}, {"sum_logits": -1.3443021774291992, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3443021774291992, "logits_per_char": -0.6721510887145996, "bits_per_byte": 0.9697090424173672, "num_chars": 2}, {"sum_logits": -1.385697364807129, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.385697364807129, "logits_per_char": -0.6928486824035645, "bits_per_byte": 0.999569358190793, "num_chars": 2}, {"sum_logits": -1.726668357849121, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.726668357849121, "logits_per_char": -0.8633341789245605, "bits_per_byte": 1.2455279385653033, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 882, "native_id": "Mercury_402502", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.82890784740448, "logits_per_token_corr": -1.82890784740448, "logits_per_char_corr": -0.91445392370224, "bits_per_byte_corr": 1.319278140847624}, "model_output": [{"sum_logits": -1.2854660749435425, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.2854660749435425, "logits_per_char": -0.6427330374717712, "bits_per_byte": 0.9272677657766908, "num_chars": 2}, {"sum_logits": -1.401772379875183, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.401772379875183, "logits_per_char": -0.7008861899375916, "bits_per_byte": 1.0111650304512498, "num_chars": 2}, {"sum_logits": -1.5289467573165894, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.5289467573165894, "logits_per_char": -0.7644733786582947, "bits_per_byte": 1.1029019522827448, "num_chars": 2}, {"sum_logits": -1.82890784740448, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.82890784740448, "logits_per_char": -0.91445392370224, "bits_per_byte": 1.319278140847624, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 883, "native_id": "Mercury_7130778", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3600249290466309, "logits_per_token_corr": -1.3600249290466309, "logits_per_char_corr": -0.6800124645233154, "bits_per_byte_corr": 0.9810506103111741}, "model_output": [{"sum_logits": -1.627927303314209, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.627927303314209, "logits_per_char": -0.8139636516571045, "bits_per_byte": 1.17430132371042, "num_chars": 2}, {"sum_logits": -1.2490382194519043, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.2490382194519043, "logits_per_char": -0.6245191097259521, "bits_per_byte": 0.9009906225426688, "num_chars": 2}, {"sum_logits": -1.3600249290466309, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3600249290466309, "logits_per_char": -0.6800124645233154, "bits_per_byte": 0.9810506103111741, "num_chars": 2}, {"sum_logits": -1.4207167625427246, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4207167625427246, "logits_per_char": -0.7103583812713623, "bits_per_byte": 1.0248305139148421, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 884, "native_id": "MEA_2010_8_18", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4109301567077637, "logits_per_token_corr": -1.4109301567077637, "logits_per_char_corr": -0.7054650783538818, "bits_per_byte_corr": 1.0177709700622206}, "model_output": [{"sum_logits": -1.2465739250183105, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.2465739250183105, "logits_per_char": -0.6232869625091553, "bits_per_byte": 0.8992130098633496, "num_chars": 2}, {"sum_logits": -1.0829482078552246, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.0829482078552246, "logits_per_char": -0.5414741039276123, "bits_per_byte": 0.7811820045067227, "num_chars": 2}, {"sum_logits": -1.4109301567077637, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4109301567077637, "logits_per_char": -0.7054650783538818, "bits_per_byte": 1.0177709700622206, "num_chars": 2}, {"sum_logits": -2.116239070892334, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -2.116239070892334, "logits_per_char": -1.058119535446167, "bits_per_byte": 1.5265438064570156, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 885, "native_id": "Mercury_7211033", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3530528545379639, "logits_per_token_corr": -1.3530528545379639, "logits_per_char_corr": -0.6765264272689819, "bits_per_byte_corr": 0.9760213216519894}, "model_output": [{"sum_logits": -1.3214385509490967, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.3214385509490967, "logits_per_char": -0.6607192754745483, "bits_per_byte": 0.9532164221475646, "num_chars": 2}, {"sum_logits": -1.3530528545379639, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3530528545379639, "logits_per_char": -0.6765264272689819, "bits_per_byte": 0.9760213216519894, "num_chars": 2}, {"sum_logits": -1.4026896953582764, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4026896953582764, "logits_per_char": -0.7013448476791382, "bits_per_byte": 1.011826733700445, "num_chars": 2}, {"sum_logits": -1.5515544414520264, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.5515544414520264, "logits_per_char": -0.7757772207260132, "bits_per_byte": 1.1192099491768461, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 886, "native_id": "NYSEDREGENTS_2008_8_17", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1637392044067383, "logits_per_token_corr": -1.1637392044067383, "logits_per_char_corr": -0.5818696022033691, "bits_per_byte_corr": 0.8394603895434376}, "model_output": [{"sum_logits": -1.1637392044067383, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.1637392044067383, "logits_per_char": -0.5818696022033691, "bits_per_byte": 0.8394603895434376, "num_chars": 2}, {"sum_logits": -1.1775712966918945, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.1775712966918945, "logits_per_char": -0.5887856483459473, "bits_per_byte": 0.8494381350159015, "num_chars": 2}, {"sum_logits": -1.4248132705688477, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4248132705688477, "logits_per_char": -0.7124066352844238, "bits_per_byte": 1.027785519821969, "num_chars": 2}, {"sum_logits": -2.047116279602051, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -2.047116279602051, "logits_per_char": -1.0235581398010254, "bits_per_byte": 1.4766822523535326, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 887, "native_id": "NAEP_2005_8_S11+1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.230506420135498, "logits_per_token_corr": -1.230506420135498, "logits_per_char_corr": -0.615253210067749, "bits_per_byte_corr": 0.8876227550563949}, "model_output": [{"sum_logits": -1.5613303184509277, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.5613303184509277, "logits_per_char": -0.7806651592254639, "bits_per_byte": 1.126261753810179, "num_chars": 2}, {"sum_logits": -1.5124125480651855, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.5124125480651855, "logits_per_char": -0.7562062740325928, "bits_per_byte": 1.0909750414367259, "num_chars": 2}, {"sum_logits": -1.230506420135498, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.230506420135498, "logits_per_char": -0.615253210067749, "bits_per_byte": 0.8876227550563949, "num_chars": 2}, {"sum_logits": -1.3492445945739746, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3492445945739746, "logits_per_char": -0.6746222972869873, "bits_per_byte": 0.9732742427697558, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 888, "native_id": "Mercury_412774", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3305665254592896, "logits_per_token_corr": -1.3305665254592896, "logits_per_char_corr": -0.6652832627296448, "bits_per_byte_corr": 0.9598008639271775}, "model_output": [{"sum_logits": -1.4401379823684692, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.4401379823684692, "logits_per_char": -0.7200689911842346, "bits_per_byte": 1.0388399626801603, "num_chars": 2}, {"sum_logits": -1.2702447175979614, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": true, "logits_per_token": -1.2702447175979614, "logits_per_char": -0.6351223587989807, "bits_per_byte": 0.9162878773976486, "num_chars": 2}, {"sum_logits": -1.3305665254592896, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.3305665254592896, "logits_per_char": -0.6652832627296448, "bits_per_byte": 0.9598008639271775, "num_chars": 2}, {"sum_logits": -1.6222668886184692, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.6222668886184692, "logits_per_char": -0.8111334443092346, "bits_per_byte": 1.1702181976049577, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 889, "native_id": "MEA_2013_5_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.283211350440979, "logits_per_token_corr": -1.283211350440979, "logits_per_char_corr": -0.6416056752204895, "bits_per_byte_corr": 0.92564132584748}, "model_output": [{"sum_logits": -1.6212507486343384, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.6212507486343384, "logits_per_char": -0.8106253743171692, "bits_per_byte": 1.16948520754698, "num_chars": 2}, {"sum_logits": -0.9063688516616821, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": true, "logits_per_token": -0.9063688516616821, "logits_per_char": -0.45318442583084106, "bits_per_byte": 0.6538069237547364, "num_chars": 2}, {"sum_logits": -1.283211350440979, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.283211350440979, "logits_per_char": -0.6416056752204895, "bits_per_byte": 0.92564132584748, "num_chars": 2}, {"sum_logits": -2.203979969024658, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -2.203979969024658, "logits_per_char": -1.101989984512329, "bits_per_byte": 1.589835485766385, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 890, "native_id": "Mercury_7098473", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.389485478401184, "logits_per_token_corr": -1.389485478401184, "logits_per_char_corr": -0.694742739200592, "bits_per_byte_corr": 1.0023019045390287}, "model_output": [{"sum_logits": -1.26978600025177, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.26978600025177, "logits_per_char": -0.634893000125885, "bits_per_byte": 0.9159569827773884, "num_chars": 2}, {"sum_logits": -1.389485478401184, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.389485478401184, "logits_per_char": -0.694742739200592, "bits_per_byte": 1.0023019045390287, "num_chars": 2}, {"sum_logits": -1.2945300340652466, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.2945300340652466, "logits_per_char": -0.6472650170326233, "bits_per_byte": 0.9338060302145469, "num_chars": 2}, {"sum_logits": -1.6906520128250122, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.6906520128250122, "logits_per_char": -0.8453260064125061, "bits_per_byte": 1.2195476373866707, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 891, "native_id": "Mercury_417593", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2300891876220703, "logits_per_token_corr": -1.2300891876220703, "logits_per_char_corr": -0.6150445938110352, "bits_per_byte_corr": 0.8873217854173847}, "model_output": [{"sum_logits": -1.2300891876220703, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.2300891876220703, "logits_per_char": -0.6150445938110352, "bits_per_byte": 0.8873217854173847, "num_chars": 2}, {"sum_logits": -1.1123485565185547, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": true, "logits_per_token": -1.1123485565185547, "logits_per_char": -0.5561742782592773, "bits_per_byte": 0.8023898731152344, "num_chars": 2}, {"sum_logits": -1.4279975891113281, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.4279975891113281, "logits_per_char": -0.7139987945556641, "bits_per_byte": 1.0300825201068944, "num_chars": 2}, {"sum_logits": -2.039796829223633, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -2.039796829223633, "logits_per_char": -1.0198984146118164, "bits_per_byte": 1.4714023849720406, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 892, "native_id": "Mercury_7081743", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0524927377700806, "logits_per_token_corr": -1.0524927377700806, "logits_per_char_corr": -0.5262463688850403, "bits_per_byte_corr": 0.7592130266768171}, "model_output": [{"sum_logits": -1.4062448740005493, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4062448740005493, "logits_per_char": -0.7031224370002747, "bits_per_byte": 1.0143912529987875, "num_chars": 2}, {"sum_logits": -1.0524927377700806, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.0524927377700806, "logits_per_char": -0.5262463688850403, "bits_per_byte": 0.7592130266768171, "num_chars": 2}, {"sum_logits": -1.4235941171646118, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4235941171646118, "logits_per_char": -0.7117970585823059, "bits_per_byte": 1.0269060865367814, "num_chars": 2}, {"sum_logits": -1.8701120615005493, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.8701120615005493, "logits_per_char": -0.9350560307502747, "bits_per_byte": 1.3490006985177085, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 893, "native_id": "Mercury_7018410", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2401759624481201, "logits_per_token_corr": -1.2401759624481201, "logits_per_char_corr": -0.6200879812240601, "bits_per_byte_corr": 0.8945978554274429}, "model_output": [{"sum_logits": -1.3640735149383545, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3640735149383545, "logits_per_char": -0.6820367574691772, "bits_per_byte": 0.9839710477054776, "num_chars": 2}, {"sum_logits": -1.2980601787567139, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.2980601787567139, "logits_per_char": -0.6490300893783569, "bits_per_byte": 0.9363524913345489, "num_chars": 2}, {"sum_logits": -1.2401759624481201, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.2401759624481201, "logits_per_char": -0.6200879812240601, "bits_per_byte": 0.8945978554274429, "num_chars": 2}, {"sum_logits": -1.7562663555145264, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.7562663555145264, "logits_per_char": -0.8781331777572632, "bits_per_byte": 1.2668783807913804, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 894, "native_id": "Mercury_402563", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7115558385849, "logits_per_token_corr": -1.7115558385849, "logits_per_char_corr": -0.85577791929245, "bits_per_byte_corr": 1.2346265602663802}, "model_output": [{"sum_logits": -1.3746284246444702, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3746284246444702, "logits_per_char": -0.6873142123222351, "bits_per_byte": 0.991584805650505, "num_chars": 2}, {"sum_logits": -1.7115558385849, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.7115558385849, "logits_per_char": -0.85577791929245, "bits_per_byte": 1.2346265602663802, "num_chars": 2}, {"sum_logits": -1.2091048955917358, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2091048955917358, "logits_per_char": -0.6045524477958679, "bits_per_byte": 0.8721848183930092, "num_chars": 2}, {"sum_logits": -1.717342734336853, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.717342734336853, "logits_per_char": -0.8586713671684265, "bits_per_byte": 1.2388009231681252, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 895, "native_id": "Mercury_416407", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3366172313690186, "logits_per_token_corr": -1.3366172313690186, "logits_per_char_corr": -0.6683086156845093, "bits_per_byte_corr": 0.9641655256321023}, "model_output": [{"sum_logits": -1.4499576091766357, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4499576091766357, "logits_per_char": -0.7249788045883179, "bits_per_byte": 1.0459233261299266, "num_chars": 2}, {"sum_logits": -1.279057264328003, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.279057264328003, "logits_per_char": -0.6395286321640015, "bits_per_byte": 0.9226447861301698, "num_chars": 2}, {"sum_logits": -1.3366172313690186, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3366172313690186, "logits_per_char": -0.6683086156845093, "bits_per_byte": 0.9641655256321023, "num_chars": 2}, {"sum_logits": -1.5634944438934326, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.5634944438934326, "logits_per_char": -0.7817472219467163, "bits_per_byte": 1.1278228403320618, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 896, "native_id": "Mercury_SC_400400", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2877912521362305, "logits_per_token_corr": -1.2877912521362305, "logits_per_char_corr": -0.6438956260681152, "bits_per_byte_corr": 0.9289450265792316}, "model_output": [{"sum_logits": -1.2877912521362305, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.2877912521362305, "logits_per_char": -0.6438956260681152, "bits_per_byte": 0.9289450265792316, "num_chars": 2}, {"sum_logits": -1.3758916854858398, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3758916854858398, "logits_per_char": -0.6879458427429199, "bits_per_byte": 0.9924960557261023, "num_chars": 2}, {"sum_logits": -1.3740606307983398, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3740606307983398, "logits_per_char": -0.6870303153991699, "bits_per_byte": 0.991175228967475, "num_chars": 2}, {"sum_logits": -1.6557073593139648, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.6557073593139648, "logits_per_char": -0.8278536796569824, "bits_per_byte": 1.194340398223667, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 897, "native_id": "MCAS_2000_8_22", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.228297233581543, "logits_per_token_corr": -2.228297233581543, "logits_per_char_corr": -1.1141486167907715, "bits_per_byte_corr": 1.607376684258499}, "model_output": [{"sum_logits": -1.2134218215942383, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.2134218215942383, "logits_per_char": -0.6067109107971191, "bits_per_byte": 0.8752988222608589, "num_chars": 2}, {"sum_logits": -1.0862398147583008, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -1.0862398147583008, "logits_per_char": -0.5431199073791504, "bits_per_byte": 0.7835563969845363, "num_chars": 2}, {"sum_logits": -1.3999605178833008, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.3999605178833008, "logits_per_char": -0.6999802589416504, "bits_per_byte": 1.009858048296017, "num_chars": 2}, {"sum_logits": -2.228297233581543, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -2.228297233581543, "logits_per_char": -1.1141486167907715, "bits_per_byte": 1.607376684258499, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 898, "native_id": "MCAS_8_2014_8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3538367748260498, "logits_per_token_corr": -1.3538367748260498, "logits_per_char_corr": -0.6769183874130249, "bits_per_byte_corr": 0.9765868006080267}, "model_output": [{"sum_logits": -1.3538367748260498, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3538367748260498, "logits_per_char": -0.6769183874130249, "bits_per_byte": 0.9765868006080267, "num_chars": 2}, {"sum_logits": -1.1803710460662842, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.1803710460662842, "logits_per_char": -0.5901855230331421, "bits_per_byte": 0.8514577272849849, "num_chars": 2}, {"sum_logits": -1.4019877910614014, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4019877910614014, "logits_per_char": -0.7009938955307007, "bits_per_byte": 1.0113204167763046, "num_chars": 2}, {"sum_logits": -1.7305629253387451, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.7305629253387451, "logits_per_char": -0.8652814626693726, "bits_per_byte": 1.2483372751671493, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 899, "native_id": "Mercury_7206430", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2925211191177368, "logits_per_token_corr": -1.2925211191177368, "logits_per_char_corr": -0.6462605595588684, "bits_per_byte_corr": 0.9323569043983758}, "model_output": [{"sum_logits": -1.2925211191177368, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -1.2925211191177368, "logits_per_char": -0.6462605595588684, "bits_per_byte": 0.9323569043983758, "num_chars": 2}, {"sum_logits": -1.3140512704849243, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.3140512704849243, "logits_per_char": -0.6570256352424622, "bits_per_byte": 0.9478876257019021, "num_chars": 2}, {"sum_logits": -1.3015066385269165, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.3015066385269165, "logits_per_char": -0.6507533192634583, "bits_per_byte": 0.938838586544098, "num_chars": 2}, {"sum_logits": -1.7497965097427368, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.7497965097427368, "logits_per_char": -0.8748982548713684, "bits_per_byte": 1.2622113735862384, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 900, "native_id": "Mercury_7185343", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7490859031677246, "logits_per_token_corr": -1.7490859031677246, "logits_per_char_corr": -0.8745429515838623, "bits_per_byte_corr": 1.2616987792953414}, "model_output": [{"sum_logits": -1.5898890495300293, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.5898890495300293, "logits_per_char": -0.7949445247650146, "bits_per_byte": 1.1468625236611443, "num_chars": 2}, {"sum_logits": -0.9962286353111267, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": true, "logits_per_token": -0.9962286353111267, "logits_per_char": -0.49811431765556335, "bits_per_byte": 0.7186270558779874, "num_chars": 2}, {"sum_logits": -1.4129748344421387, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.4129748344421387, "logits_per_char": -0.7064874172210693, "bits_per_byte": 1.019245893276021, "num_chars": 2}, {"sum_logits": -1.7490859031677246, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.7490859031677246, "logits_per_char": -0.8745429515838623, "bits_per_byte": 1.2616987792953414, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 901, "native_id": "OHAT_2010_8_8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.180480718612671, "logits_per_token_corr": -1.180480718612671, "logits_per_char_corr": -0.5902403593063354, "bits_per_byte_corr": 0.8515368393043818}, "model_output": [{"sum_logits": -1.7139966487884521, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.7139966487884521, "logits_per_char": -0.8569983243942261, "bits_per_byte": 1.2363872326545893, "num_chars": 2}, {"sum_logits": -1.180480718612671, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.180480718612671, "logits_per_char": -0.5902403593063354, "bits_per_byte": 0.8515368393043818, "num_chars": 2}, {"sum_logits": -1.2217442989349365, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.2217442989349365, "logits_per_char": -0.6108721494674683, "bits_per_byte": 0.8813022206545312, "num_chars": 2}, {"sum_logits": -1.5794141292572021, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.5794141292572021, "logits_per_char": -0.7897070646286011, "bits_per_byte": 1.1393064658954815, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 902, "native_id": "Mercury_405462", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9142506718635559, "logits_per_token_corr": -0.9142506718635559, "logits_per_char_corr": -0.45712533593177795, "bits_per_byte_corr": 0.6594924552139513}, "model_output": [{"sum_logits": -1.304887056350708, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.304887056350708, "logits_per_char": -0.652443528175354, "bits_per_byte": 0.9412770425593582, "num_chars": 2}, {"sum_logits": -0.9142506718635559, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -0.9142506718635559, "logits_per_char": -0.45712533593177795, "bits_per_byte": 0.6594924552139513, "num_chars": 2}, {"sum_logits": -1.6077988147735596, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.6077988147735596, "logits_per_char": -0.8038994073867798, "bits_per_byte": 1.159781688411317, "num_chars": 2}, {"sum_logits": -2.1244518756866455, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -2.1244518756866455, "logits_per_char": -1.0622259378433228, "bits_per_byte": 1.532468092831291, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 903, "native_id": "Mercury_SC_LBS10337", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3009217977523804, "logits_per_token_corr": -1.3009217977523804, "logits_per_char_corr": -0.6504608988761902, "bits_per_byte_corr": 0.9384167131015313}, "model_output": [{"sum_logits": -1.3009217977523804, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3009217977523804, "logits_per_char": -0.6504608988761902, "bits_per_byte": 0.9384167131015313, "num_chars": 2}, {"sum_logits": -1.032056212425232, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.032056212425232, "logits_per_char": -0.516028106212616, "bits_per_byte": 0.7444711897927992, "num_chars": 2}, {"sum_logits": -1.5373739004135132, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.5373739004135132, "logits_per_char": -0.7686869502067566, "bits_per_byte": 1.108980851060146, "num_chars": 2}, {"sum_logits": -1.9431604146957397, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.9431604146957397, "logits_per_char": -0.9715802073478699, "bits_per_byte": 1.4016939469676497, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 904, "native_id": "Mercury_7142520", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.305672526359558, "logits_per_token_corr": -1.305672526359558, "logits_per_char_corr": -0.652836263179779, "bits_per_byte_corr": 0.941843639402626}, "model_output": [{"sum_logits": -1.385722041130066, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.385722041130066, "logits_per_char": -0.692861020565033, "bits_per_byte": 0.9995871583951573, "num_chars": 2}, {"sum_logits": -1.2502983808517456, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2502983808517456, "logits_per_char": -0.6251491904258728, "bits_per_byte": 0.9018996368438049, "num_chars": 2}, {"sum_logits": -1.305672526359558, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.305672526359558, "logits_per_char": -0.652836263179779, "bits_per_byte": 0.941843639402626, "num_chars": 2}, {"sum_logits": -1.7383984327316284, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.7383984327316284, "logits_per_char": -0.8691992163658142, "bits_per_byte": 1.2539893989964341, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 905, "native_id": "Mercury_SC_405501", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9846168756484985, "logits_per_token_corr": -1.9846168756484985, "logits_per_char_corr": -0.9923084378242493, "bits_per_byte_corr": 1.4315984622823472}, "model_output": [{"sum_logits": -1.4194446802139282, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.4194446802139282, "logits_per_char": -0.7097223401069641, "bits_per_byte": 1.023912900481163, "num_chars": 2}, {"sum_logits": -1.0453754663467407, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.0453754663467407, "logits_per_char": -0.5226877331733704, "bits_per_byte": 0.7540790005832569, "num_chars": 2}, {"sum_logits": -1.3612209558486938, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.3612209558486938, "logits_per_char": -0.6806104779243469, "bits_per_byte": 0.981913361279228, "num_chars": 2}, {"sum_logits": -1.9846168756484985, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.9846168756484985, "logits_per_char": -0.9923084378242493, "bits_per_byte": 1.4315984622823472, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 906, "native_id": "Mercury_7009555", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4554190635681152, "logits_per_token_corr": -1.4554190635681152, "logits_per_char_corr": -0.7277095317840576, "bits_per_byte_corr": 1.0498629327132436}, "model_output": [{"sum_logits": -1.3336081504821777, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3336081504821777, "logits_per_char": -0.6668040752410889, "bits_per_byte": 0.9619949325955612, "num_chars": 2}, {"sum_logits": -1.2296862602233887, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.2296862602233887, "logits_per_char": -0.6148431301116943, "bits_per_byte": 0.8870311347374263, "num_chars": 2}, {"sum_logits": -1.4554190635681152, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4554190635681152, "logits_per_char": -0.7277095317840576, "bits_per_byte": 1.0498629327132436, "num_chars": 2}, {"sum_logits": -1.6254096031188965, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.6254096031188965, "logits_per_char": -0.8127048015594482, "bits_per_byte": 1.1724851869173074, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 907, "native_id": "Mercury_409085", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7670787572860718, "logits_per_token_corr": -1.7670787572860718, "logits_per_char_corr": -0.8835393786430359, "bits_per_byte_corr": 1.2746778799993397}, "model_output": [{"sum_logits": -1.7670787572860718, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.7670787572860718, "logits_per_char": -0.8835393786430359, "bits_per_byte": 1.2746778799993397, "num_chars": 2}, {"sum_logits": -1.6651443243026733, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.6651443243026733, "logits_per_char": -0.8325721621513367, "bits_per_byte": 1.2011477295187982, "num_chars": 2}, {"sum_logits": -1.0604785680770874, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.0604785680770874, "logits_per_char": -0.5302392840385437, "bits_per_byte": 0.7649735855674711, "num_chars": 2}, {"sum_logits": -1.4336131811141968, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4336131811141968, "logits_per_char": -0.7168065905570984, "bits_per_byte": 1.0341333134739945, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 908, "native_id": "NYSEDREGENTS_2012_4_2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2458622455596924, "logits_per_token_corr": -1.2458622455596924, "logits_per_char_corr": -0.6229311227798462, "bits_per_byte_corr": 0.8986996416505237}, "model_output": [{"sum_logits": -1.2458622455596924, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.2458622455596924, "logits_per_char": -0.6229311227798462, "bits_per_byte": 0.8986996416505237, "num_chars": 2}, {"sum_logits": -1.3085529804229736, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3085529804229736, "logits_per_char": -0.6542764902114868, "bits_per_byte": 0.9439214477990265, "num_chars": 2}, {"sum_logits": -1.402686357498169, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.402686357498169, "logits_per_char": -0.7013431787490845, "bits_per_byte": 1.0118243259433328, "num_chars": 2}, {"sum_logits": -1.7324135303497314, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.7324135303497314, "logits_per_char": -0.8662067651748657, "bits_per_byte": 1.2496722045031474, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 909, "native_id": "Mercury_407539", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9595634937286377, "logits_per_token_corr": -1.9595634937286377, "logits_per_char_corr": -0.9797817468643188, "bits_per_byte_corr": 1.413526267355694}, "model_output": [{"sum_logits": -1.3472168445587158, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.3472168445587158, "logits_per_char": -0.6736084222793579, "bits_per_byte": 0.9718115303241666, "num_chars": 2}, {"sum_logits": -1.0288002490997314, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -1.0288002490997314, "logits_per_char": -0.5144001245498657, "bits_per_byte": 0.7421225087212896, "num_chars": 2}, {"sum_logits": -1.4841892719268799, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.4841892719268799, "logits_per_char": -0.7420946359634399, "bits_per_byte": 1.0706162511755246, "num_chars": 2}, {"sum_logits": -1.9595634937286377, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.9595634937286377, "logits_per_char": -0.9797817468643188, "bits_per_byte": 1.413526267355694, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 910, "native_id": "ACTAAP_2013_7_16", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7621294260025024, "logits_per_token_corr": -1.7621294260025024, "logits_per_char_corr": -0.8810647130012512, "bits_per_byte_corr": 1.2711076921500761}, "model_output": [{"sum_logits": -1.3722139596939087, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.3722139596939087, "logits_per_char": -0.6861069798469543, "bits_per_byte": 0.9898431373452161, "num_chars": 2}, {"sum_logits": -1.090269684791565, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": true, "logits_per_token": -1.090269684791565, "logits_per_char": -0.5451348423957825, "bits_per_byte": 0.7864633337407471, "num_chars": 2}, {"sum_logits": -1.5348936319351196, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.5348936319351196, "logits_per_char": -0.7674468159675598, "bits_per_byte": 1.107191715543219, "num_chars": 2}, {"sum_logits": -1.7621294260025024, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.7621294260025024, "logits_per_char": -0.8810647130012512, "bits_per_byte": 1.2711076921500761, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 911, "native_id": "AKDE&ED_2008_8_34", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.694522500038147, "logits_per_token_corr": -1.694522500038147, "logits_per_char_corr": -0.8472612500190735, "bits_per_byte_corr": 1.2223396037407797}, "model_output": [{"sum_logits": -1.3544899225234985, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3544899225234985, "logits_per_char": -0.6772449612617493, "bits_per_byte": 0.9770579470800658, "num_chars": 2}, {"sum_logits": -1.243472695350647, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.243472695350647, "logits_per_char": -0.6217363476753235, "bits_per_byte": 0.89697594553225, "num_chars": 2}, {"sum_logits": -1.3529068231582642, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3529068231582642, "logits_per_char": -0.6764534115791321, "bits_per_byte": 0.9759159822783359, "num_chars": 2}, {"sum_logits": -1.694522500038147, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.694522500038147, "logits_per_char": -0.8472612500190735, "bits_per_byte": 1.2223396037407797, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 912, "native_id": "MCAS_2004_8_3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3488203287124634, "logits_per_token_corr": -1.3488203287124634, "logits_per_char_corr": -0.6744101643562317, "bits_per_byte_corr": 0.9729681996425452}, "model_output": [{"sum_logits": -1.3488203287124634, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3488203287124634, "logits_per_char": -0.6744101643562317, "bits_per_byte": 0.9729681996425452, "num_chars": 2}, {"sum_logits": -1.190672516822815, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.190672516822815, "logits_per_char": -0.5953362584114075, "bits_per_byte": 0.858888667672145, "num_chars": 2}, {"sum_logits": -1.5470796823501587, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5470796823501587, "logits_per_char": -0.7735398411750793, "bits_per_byte": 1.115982092794125, "num_chars": 2}, {"sum_logits": -1.60616934299469, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.60616934299469, "logits_per_char": -0.803084671497345, "bits_per_byte": 1.1586062729839943, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 913, "native_id": "Mercury_415272", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1259331703186035, "logits_per_token_corr": -1.1259331703186035, "logits_per_char_corr": -0.5629665851593018, "bits_per_byte_corr": 0.8121891005961025}, "model_output": [{"sum_logits": -1.8902153968811035, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.8902153968811035, "logits_per_char": -0.9451076984405518, "bits_per_byte": 1.3635021896471455, "num_chars": 2}, {"sum_logits": -1.1259331703186035, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.1259331703186035, "logits_per_char": -0.5629665851593018, "bits_per_byte": 0.8121891005961025, "num_chars": 2}, {"sum_logits": -1.5132718086242676, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.5132718086242676, "logits_per_char": -0.7566359043121338, "bits_per_byte": 1.0915948669104358, "num_chars": 2}, {"sum_logits": -1.3371758460998535, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3371758460998535, "logits_per_char": -0.6685879230499268, "bits_per_byte": 0.9645684809830742, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 914, "native_id": "Mercury_405387", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.256664514541626, "logits_per_token_corr": -1.256664514541626, "logits_per_char_corr": -0.628332257270813, "bits_per_byte_corr": 0.9064918315958215}, "model_output": [{"sum_logits": -1.704397439956665, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.704397439956665, "logits_per_char": -0.8521987199783325, "bits_per_byte": 1.229462867165546, "num_chars": 2}, {"sum_logits": -1.256664514541626, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.256664514541626, "logits_per_char": -0.628332257270813, "bits_per_byte": 0.9064918315958215, "num_chars": 2}, {"sum_logits": -1.1464979648590088, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.1464979648590088, "logits_per_char": -0.5732489824295044, "bits_per_byte": 0.8270234641462847, "num_chars": 2}, {"sum_logits": -1.6439287662506104, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.6439287662506104, "logits_per_char": -0.8219643831253052, "bits_per_byte": 1.1858439393230857, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 915, "native_id": "Mercury_7116323", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.236640214920044, "logits_per_token_corr": -1.236640214920044, "logits_per_char_corr": -0.618320107460022, "bits_per_byte_corr": 0.8920473527151456}, "model_output": [{"sum_logits": -1.7101013660430908, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.7101013660430908, "logits_per_char": -0.8550506830215454, "bits_per_byte": 1.233577380104791, "num_chars": 2}, {"sum_logits": -1.4334461688995361, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4334461688995361, "logits_per_char": -0.7167230844497681, "bits_per_byte": 1.034012839627065, "num_chars": 2}, {"sum_logits": -1.2848389148712158, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.2848389148712158, "logits_per_char": -0.6424194574356079, "bits_per_byte": 0.9268153654135959, "num_chars": 2}, {"sum_logits": -1.236640214920044, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.236640214920044, "logits_per_char": -0.618320107460022, "bits_per_byte": 0.8920473527151456, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 916, "native_id": "Mercury_7213430", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.116429328918457, "logits_per_token_corr": -1.116429328918457, "logits_per_char_corr": -0.5582146644592285, "bits_per_byte_corr": 0.8053335281674042}, "model_output": [{"sum_logits": -1.2443361282348633, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.2443361282348633, "logits_per_char": -0.6221680641174316, "bits_per_byte": 0.8975987807023501, "num_chars": 2}, {"sum_logits": -1.116429328918457, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.116429328918457, "logits_per_char": -0.5582146644592285, "bits_per_byte": 0.8053335281674042, "num_chars": 2}, {"sum_logits": -1.3600893020629883, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3600893020629883, "logits_per_char": -0.6800446510314941, "bits_per_byte": 0.9810970456269071, "num_chars": 2}, {"sum_logits": -2.2230939865112305, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -2.2230939865112305, "logits_per_char": -1.1115469932556152, "bits_per_byte": 1.6036233348860662, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 917, "native_id": "Mercury_7234360", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.510286569595337, "logits_per_token_corr": -1.510286569595337, "logits_per_char_corr": -0.7551432847976685, "bits_per_byte_corr": 1.0894414721389811}, "model_output": [{"sum_logits": -1.510286569595337, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.510286569595337, "logits_per_char": -0.7551432847976685, "bits_per_byte": 1.0894414721389811, "num_chars": 2}, {"sum_logits": -0.9494096636772156, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -0.9494096636772156, "logits_per_char": -0.4747048318386078, "bits_per_byte": 0.6848543067800807, "num_chars": 2}, {"sum_logits": -1.3867590427398682, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3867590427398682, "logits_per_char": -0.6933795213699341, "bits_per_byte": 1.0003351969350855, "num_chars": 2}, {"sum_logits": -1.99806809425354, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.99806809425354, "logits_per_char": -0.99903404712677, "bits_per_byte": 1.4413014654700576, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 918, "native_id": "Mercury_405685", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8946540355682373, "logits_per_token_corr": -1.8946540355682373, "logits_per_char_corr": -0.9473270177841187, "bits_per_byte_corr": 1.3667039906582608}, "model_output": [{"sum_logits": -1.1342380046844482, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.1342380046844482, "logits_per_char": -0.5671190023422241, "bits_per_byte": 0.8181797722736109, "num_chars": 2}, {"sum_logits": -1.2712962627410889, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.2712962627410889, "logits_per_char": -0.6356481313705444, "bits_per_byte": 0.9170464068792796, "num_chars": 2}, {"sum_logits": -1.4524848461151123, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4524848461151123, "logits_per_char": -0.7262424230575562, "bits_per_byte": 1.0477463422290736, "num_chars": 2}, {"sum_logits": -1.8946540355682373, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.8946540355682373, "logits_per_char": -0.9473270177841187, "bits_per_byte": 1.3667039906582608, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 919, "native_id": "Mercury_7236740", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2939893007278442, "logits_per_token_corr": -1.2939893007278442, "logits_per_char_corr": -0.6469946503639221, "bits_per_byte_corr": 0.9334159735623898}, "model_output": [{"sum_logits": -1.465673565864563, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.465673565864563, "logits_per_char": -0.7328367829322815, "bits_per_byte": 1.0572599925181838, "num_chars": 2}, {"sum_logits": -1.2939893007278442, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.2939893007278442, "logits_per_char": -0.6469946503639221, "bits_per_byte": 0.9334159735623898, "num_chars": 2}, {"sum_logits": -1.4816476106643677, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4816476106643677, "logits_per_char": -0.7408238053321838, "bits_per_byte": 1.0687828301260003, "num_chars": 2}, {"sum_logits": -1.5149422883987427, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.5149422883987427, "logits_per_char": -0.7574711441993713, "bits_per_byte": 1.092799863353707, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 920, "native_id": "Mercury_7116235", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2340182065963745, "logits_per_token_corr": -1.2340182065963745, "logits_per_char_corr": -0.6170091032981873, "bits_per_byte_corr": 0.8901559735122805}, "model_output": [{"sum_logits": -1.2340182065963745, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.2340182065963745, "logits_per_char": -0.6170091032981873, "bits_per_byte": 0.8901559735122805, "num_chars": 2}, {"sum_logits": -1.1757696866989136, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.1757696866989136, "logits_per_char": -0.5878848433494568, "bits_per_byte": 0.8481385481146557, "num_chars": 2}, {"sum_logits": -1.4456270933151245, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4456270933151245, "logits_per_char": -0.7228135466575623, "bits_per_byte": 1.0427995192509776, "num_chars": 2}, {"sum_logits": -1.8593004941940308, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.8593004941940308, "logits_per_char": -0.9296502470970154, "bits_per_byte": 1.3412018012490272, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 921, "native_id": "Mercury_SC_405357", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.261278748512268, "logits_per_token_corr": -1.261278748512268, "logits_per_char_corr": -0.630639374256134, "bits_per_byte_corr": 0.9098202978292972}, "model_output": [{"sum_logits": -1.593178391456604, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.593178391456604, "logits_per_char": -0.796589195728302, "bits_per_byte": 1.1492352823037748, "num_chars": 2}, {"sum_logits": -1.0173898935317993, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.0173898935317993, "logits_per_char": -0.5086949467658997, "bits_per_byte": 0.7338916770249659, "num_chars": 2}, {"sum_logits": -1.261278748512268, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.261278748512268, "logits_per_char": -0.630639374256134, "bits_per_byte": 0.9098202978292972, "num_chars": 2}, {"sum_logits": -1.95803701877594, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.95803701877594, "logits_per_char": -0.97901850938797, "bits_per_byte": 1.4124251484335442, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 922, "native_id": "Mercury_7042945", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3289239406585693, "logits_per_token_corr": -1.3289239406585693, "logits_per_char_corr": -0.6644619703292847, "bits_per_byte_corr": 0.9586159894540572}, "model_output": [{"sum_logits": -1.3303086757659912, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3303086757659912, "logits_per_char": -0.6651543378829956, "bits_per_byte": 0.9596148646902691, "num_chars": 2}, {"sum_logits": -1.3289239406585693, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.3289239406585693, "logits_per_char": -0.6644619703292847, "bits_per_byte": 0.9586159894540572, "num_chars": 2}, {"sum_logits": -1.3637712001800537, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3637712001800537, "logits_per_char": -0.6818856000900269, "bits_per_byte": 0.9837529737041834, "num_chars": 2}, {"sum_logits": -1.6734578609466553, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.6734578609466553, "logits_per_char": -0.8367289304733276, "bits_per_byte": 1.2071446785630633, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 923, "native_id": "Mercury_7106750", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.192437171936035, "logits_per_token_corr": -2.192437171936035, "logits_per_char_corr": -1.0962185859680176, "bits_per_byte_corr": 1.581509117707507}, "model_output": [{"sum_logits": -1.1243619918823242, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.1243619918823242, "logits_per_char": -0.5621809959411621, "bits_per_byte": 0.8110557349269157, "num_chars": 2}, {"sum_logits": -0.9932451248168945, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": true, "logits_per_token": -0.9932451248168945, "logits_per_char": -0.49662256240844727, "bits_per_byte": 0.7164749079807513, "num_chars": 2}, {"sum_logits": -1.703629493713379, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.703629493713379, "logits_per_char": -0.8518147468566895, "bits_per_byte": 1.2289089110471165, "num_chars": 2}, {"sum_logits": -2.192437171936035, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -2.192437171936035, "logits_per_char": -1.0962185859680176, "bits_per_byte": 1.581509117707507, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 924, "native_id": "MDSA_2009_4_34", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2344708442687988, "logits_per_token_corr": -1.2344708442687988, "logits_per_char_corr": -0.6172354221343994, "bits_per_byte_corr": 0.8904824825749437}, "model_output": [{"sum_logits": -1.2752861976623535, "num_tokens": 1, "num_tokens_all": 481, "is_greedy": false, "logits_per_token": -1.2752861976623535, "logits_per_char": -0.6376430988311768, "bits_per_byte": 0.9199245365414708, "num_chars": 2}, {"sum_logits": -1.2344708442687988, "num_tokens": 1, "num_tokens_all": 481, "is_greedy": true, "logits_per_token": -1.2344708442687988, "logits_per_char": -0.6172354221343994, "bits_per_byte": 0.8904824825749437, "num_chars": 2}, {"sum_logits": -1.372499942779541, "num_tokens": 1, "num_tokens_all": 481, "is_greedy": false, "logits_per_token": -1.372499942779541, "logits_per_char": -0.6862499713897705, "bits_per_byte": 0.9900494305349262, "num_chars": 2}, {"sum_logits": -1.8975567817687988, "num_tokens": 1, "num_tokens_all": 481, "is_greedy": false, "logits_per_token": -1.8975567817687988, "logits_per_char": -0.9487783908843994, "bits_per_byte": 1.3687978794325169, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 925, "native_id": "Mercury_7016310", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4635944366455078, "logits_per_token_corr": -1.4635944366455078, "logits_per_char_corr": -0.7317972183227539, "bits_per_byte_corr": 1.0557602178113337}, "model_output": [{"sum_logits": -1.3520946502685547, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3520946502685547, "logits_per_char": -0.6760473251342773, "bits_per_byte": 0.9753301233781713, "num_chars": 2}, {"sum_logits": -1.2752933502197266, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.2752933502197266, "logits_per_char": -0.6376466751098633, "bits_per_byte": 0.9199296960209967, "num_chars": 2}, {"sum_logits": -1.4635944366455078, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4635944366455078, "logits_per_char": -0.7317972183227539, "bits_per_byte": 1.0557602178113337, "num_chars": 2}, {"sum_logits": -1.5886173248291016, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.5886173248291016, "logits_per_char": -0.7943086624145508, "bits_per_byte": 1.1459451682014414, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 926, "native_id": "VASoL_2007_3_1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.491655945777893, "logits_per_token_corr": -1.491655945777893, "logits_per_char_corr": -0.7458279728889465, "bits_per_byte_corr": 1.0760023178439244}, "model_output": [{"sum_logits": -1.491655945777893, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.491655945777893, "logits_per_char": -0.7458279728889465, "bits_per_byte": 1.0760023178439244, "num_chars": 2}, {"sum_logits": -1.3313223123550415, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3313223123550415, "logits_per_char": -0.6656611561775208, "bits_per_byte": 0.9603460489304129, "num_chars": 2}, {"sum_logits": -1.2418428659439087, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.2418428659439087, "logits_per_char": -0.6209214329719543, "bits_per_byte": 0.895800272130951, "num_chars": 2}, {"sum_logits": -1.5642534494400024, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5642534494400024, "logits_per_char": -0.7821267247200012, "bits_per_byte": 1.1283703471010837, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 927, "native_id": "Mercury_7030468", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3871444463729858, "logits_per_token_corr": -1.3871444463729858, "logits_per_char_corr": -0.6935722231864929, "bits_per_byte_corr": 1.0006132068902056}, "model_output": [{"sum_logits": -1.3871444463729858, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.3871444463729858, "logits_per_char": -0.6935722231864929, "bits_per_byte": 1.0006132068902056, "num_chars": 2}, {"sum_logits": -1.0343841314315796, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": true, "logits_per_token": -1.0343841314315796, "logits_per_char": -0.5171920657157898, "bits_per_byte": 0.7461504283958249, "num_chars": 2}, {"sum_logits": -1.3516048192977905, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.3516048192977905, "logits_per_char": -0.6758024096488953, "bits_per_byte": 0.9749767850219734, "num_chars": 2}, {"sum_logits": -2.1580300331115723, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -2.1580300331115723, "logits_per_char": -1.0790150165557861, "bits_per_byte": 1.5566896134308739, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 928, "native_id": "Mercury_SC_402616", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6844440698623657, "logits_per_token_corr": -1.6844440698623657, "logits_per_char_corr": -0.8422220349311829, "bits_per_byte_corr": 1.2150695531235018}, "model_output": [{"sum_logits": -1.4235435724258423, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4235435724258423, "logits_per_char": -0.7117717862129211, "bits_per_byte": 1.0268696262147985, "num_chars": 2}, {"sum_logits": -1.1660553216934204, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.1660553216934204, "logits_per_char": -0.5830276608467102, "bits_per_byte": 0.8411311150052456, "num_chars": 2}, {"sum_logits": -1.388146996498108, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.388146996498108, "logits_per_char": -0.694073498249054, "bits_per_byte": 1.001336393937084, "num_chars": 2}, {"sum_logits": -1.6844440698623657, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.6844440698623657, "logits_per_char": -0.8422220349311829, "bits_per_byte": 1.2150695531235018, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 929, "native_id": "Mercury_405464", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2304799556732178, "logits_per_token_corr": -1.2304799556732178, "logits_per_char_corr": -0.6152399778366089, "bits_per_byte_corr": 0.887603664982149}, "model_output": [{"sum_logits": -1.2304799556732178, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.2304799556732178, "logits_per_char": -0.6152399778366089, "bits_per_byte": 0.887603664982149, "num_chars": 2}, {"sum_logits": -1.0193288326263428, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.0193288326263428, "logits_per_char": -0.5096644163131714, "bits_per_byte": 0.7352903259331087, "num_chars": 2}, {"sum_logits": -1.370044469833374, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.370044469833374, "logits_per_char": -0.685022234916687, "bits_per_byte": 0.9882781812136889, "num_chars": 2}, {"sum_logits": -2.459909200668335, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -2.459909200668335, "logits_per_char": -1.2299546003341675, "bits_per_byte": 1.7744494024219453, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 930, "native_id": "Mercury_7205608", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3341683149337769, "logits_per_token_corr": -1.3341683149337769, "logits_per_char_corr": -0.6670841574668884, "bits_per_byte_corr": 0.9623990058337637}, "model_output": [{"sum_logits": -1.630989909172058, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.630989909172058, "logits_per_char": -0.815494954586029, "bits_per_byte": 1.1765105268520797, "num_chars": 2}, {"sum_logits": -1.3341683149337769, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3341683149337769, "logits_per_char": -0.6670841574668884, "bits_per_byte": 0.9623990058337637, "num_chars": 2}, {"sum_logits": -1.1778534650802612, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.1778534650802612, "logits_per_char": -0.5889267325401306, "bits_per_byte": 0.8496416764831978, "num_chars": 2}, {"sum_logits": -1.5285156965255737, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.5285156965255737, "logits_per_char": -0.7642578482627869, "bits_per_byte": 1.1025910076499845, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 931, "native_id": "Mercury_7015208", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2384111881256104, "logits_per_token_corr": -1.2384111881256104, "logits_per_char_corr": -0.6192055940628052, "bits_per_byte_corr": 0.8933248398457554}, "model_output": [{"sum_logits": -1.468395471572876, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.468395471572876, "logits_per_char": -0.734197735786438, "bits_per_byte": 1.0592234324517604, "num_chars": 2}, {"sum_logits": -1.2384111881256104, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2384111881256104, "logits_per_char": -0.6192055940628052, "bits_per_byte": 0.8933248398457554, "num_chars": 2}, {"sum_logits": -1.2597410678863525, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.2597410678863525, "logits_per_char": -0.6298705339431763, "bits_per_byte": 0.9087110957225567, "num_chars": 2}, {"sum_logits": -1.6986773014068604, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.6986773014068604, "logits_per_char": -0.8493386507034302, "bits_per_byte": 1.2253366594060426, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 932, "native_id": "Mercury_SC_409666", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9686360359191895, "logits_per_token_corr": -1.9686360359191895, "logits_per_char_corr": -0.9843180179595947, "bits_per_byte_corr": 1.420070723168981}, "model_output": [{"sum_logits": -1.254641056060791, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.254641056060791, "logits_per_char": -0.6273205280303955, "bits_per_byte": 0.9050322148379478, "num_chars": 2}, {"sum_logits": -1.0648541450500488, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": true, "logits_per_token": -1.0648541450500488, "logits_per_char": -0.5324270725250244, "bits_per_byte": 0.7681298971674331, "num_chars": 2}, {"sum_logits": -1.5122113227844238, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.5122113227844238, "logits_per_char": -0.7561056613922119, "bits_per_byte": 1.0908298880793976, "num_chars": 2}, {"sum_logits": -1.9686360359191895, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.9686360359191895, "logits_per_char": -0.9843180179595947, "bits_per_byte": 1.420070723168981, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 933, "native_id": "Mercury_7230353", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3493976593017578, "logits_per_token_corr": -1.3493976593017578, "logits_per_char_corr": -0.6746988296508789, "bits_per_byte_corr": 0.9733846556316098}, "model_output": [{"sum_logits": -1.5095787048339844, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.5095787048339844, "logits_per_char": -0.7547893524169922, "bits_per_byte": 1.088930855648569, "num_chars": 2}, {"sum_logits": -1.3493976593017578, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.3493976593017578, "logits_per_char": -0.6746988296508789, "bits_per_byte": 0.9733846556316098, "num_chars": 2}, {"sum_logits": -1.3521156311035156, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3521156311035156, "logits_per_char": -0.6760578155517578, "bits_per_byte": 0.9753452578514472, "num_chars": 2}, {"sum_logits": -1.4146652221679688, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4146652221679688, "logits_per_char": -0.7073326110839844, "bits_per_byte": 1.0204652502706393, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 934, "native_id": "Mercury_7150343", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1097447872161865, "logits_per_token_corr": -1.1097447872161865, "logits_per_char_corr": -0.5548723936080933, "bits_per_byte_corr": 0.8005116505851603}, "model_output": [{"sum_logits": -1.3056199550628662, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.3056199550628662, "logits_per_char": -0.6528099775314331, "bits_per_byte": 0.9418057172281108, "num_chars": 2}, {"sum_logits": -1.1097447872161865, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": true, "logits_per_token": -1.1097447872161865, "logits_per_char": -0.5548723936080933, "bits_per_byte": 0.8005116505851603, "num_chars": 2}, {"sum_logits": -1.462066411972046, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.462066411972046, "logits_per_char": -0.731033205986023, "bits_per_byte": 1.0546579810019532, "num_chars": 2}, {"sum_logits": -1.870746374130249, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.870746374130249, "logits_per_char": -0.9353731870651245, "bits_per_byte": 1.3494582583603294, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 935, "native_id": "Mercury_7026723", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.0659079551696777, "logits_per_token_corr": -2.0659079551696777, "logits_per_char_corr": -1.0329539775848389, "bits_per_byte_corr": 1.4902375809292472}, "model_output": [{"sum_logits": -1.201245903968811, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.201245903968811, "logits_per_char": -0.6006229519844055, "bits_per_byte": 0.8665157542726144, "num_chars": 2}, {"sum_logits": -1.063439965248108, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.063439965248108, "logits_per_char": -0.531719982624054, "bits_per_byte": 0.7671097820738395, "num_chars": 2}, {"sum_logits": -1.5569645166397095, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5569645166397095, "logits_per_char": -0.7784822583198547, "bits_per_byte": 1.1231124934989023, "num_chars": 2}, {"sum_logits": -2.0659079551696777, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -2.0659079551696777, "logits_per_char": -1.0329539775848389, "bits_per_byte": 1.4902375809292472, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 936, "native_id": "Mercury_7024273", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3595209121704102, "logits_per_token_corr": -1.3595209121704102, "logits_per_char_corr": -0.6797604560852051, "bits_per_byte_corr": 0.98068703898725}, "model_output": [{"sum_logits": -1.3211793899536133, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3211793899536133, "logits_per_char": -0.6605896949768066, "bits_per_byte": 0.9530294770060767, "num_chars": 2}, {"sum_logits": -1.1448240280151367, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.1448240280151367, "logits_per_char": -0.5724120140075684, "bits_per_byte": 0.8258159739545761, "num_chars": 2}, {"sum_logits": -1.3595209121704102, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3595209121704102, "logits_per_char": -0.6797604560852051, "bits_per_byte": 0.98068703898725, "num_chars": 2}, {"sum_logits": -1.9190645217895508, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.9190645217895508, "logits_per_char": -0.9595322608947754, "bits_per_byte": 1.384312434366862, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 937, "native_id": "AKDE&ED_2008_8_40", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1943532228469849, "logits_per_token_corr": -1.1943532228469849, "logits_per_char_corr": -0.5971766114234924, "bits_per_byte_corr": 0.861543735836167}, "model_output": [{"sum_logits": -1.3298417329788208, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.3298417329788208, "logits_per_char": -0.6649208664894104, "bits_per_byte": 0.959278036668554, "num_chars": 2}, {"sum_logits": -1.3622475862503052, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.3622475862503052, "logits_per_char": -0.6811237931251526, "bits_per_byte": 0.9826539185738439, "num_chars": 2}, {"sum_logits": -1.1943532228469849, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.1943532228469849, "logits_per_char": -0.5971766114234924, "bits_per_byte": 0.861543735836167, "num_chars": 2}, {"sum_logits": -1.9242841005325317, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.9242841005325317, "logits_per_char": -0.9621420502662659, "bits_per_byte": 1.3880775645508787, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 938, "native_id": "Mercury_183033", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2052407264709473, "logits_per_token_corr": -1.2052407264709473, "logits_per_char_corr": -0.6026203632354736, "bits_per_byte_corr": 0.8693974095791482}, "model_output": [{"sum_logits": -1.2031102180480957, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2031102180480957, "logits_per_char": -0.6015551090240479, "bits_per_byte": 0.8678605726110371, "num_chars": 2}, {"sum_logits": -1.2052407264709473, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.2052407264709473, "logits_per_char": -0.6026203632354736, "bits_per_byte": 0.8693974095791482, "num_chars": 2}, {"sum_logits": -1.291445255279541, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.291445255279541, "logits_per_char": -0.6457226276397705, "bits_per_byte": 0.9315808326863569, "num_chars": 2}, {"sum_logits": -2.2096619606018066, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -2.2096619606018066, "logits_per_char": -1.1048309803009033, "bits_per_byte": 1.5939341763017503, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 939, "native_id": "Mercury_402364", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4239888191223145, "logits_per_token_corr": -1.4239888191223145, "logits_per_char_corr": -0.7119944095611572, "bits_per_byte_corr": 1.027190803815285}, "model_output": [{"sum_logits": -1.71445894241333, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.71445894241333, "logits_per_char": -0.857229471206665, "bits_per_byte": 1.2367207070146127, "num_chars": 2}, {"sum_logits": -1.1478428840637207, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.1478428840637207, "logits_per_char": -0.5739214420318604, "bits_per_byte": 0.8279936182798024, "num_chars": 2}, {"sum_logits": -1.5496582984924316, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.5496582984924316, "logits_per_char": -0.7748291492462158, "bits_per_byte": 1.1178421711545332, "num_chars": 2}, {"sum_logits": -1.4239888191223145, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4239888191223145, "logits_per_char": -0.7119944095611572, "bits_per_byte": 1.027190803815285, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 940, "native_id": "Mercury_7263183", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.299984335899353, "logits_per_token_corr": -1.299984335899353, "logits_per_char_corr": -0.6499921679496765, "bits_per_byte_corr": 0.9377404773183382}, "model_output": [{"sum_logits": -1.5224822759628296, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.5224822759628296, "logits_per_char": -0.7612411379814148, "bits_per_byte": 1.0982388146872473, "num_chars": 2}, {"sum_logits": -1.299984335899353, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.299984335899353, "logits_per_char": -0.6499921679496765, "bits_per_byte": 0.9377404773183382, "num_chars": 2}, {"sum_logits": -1.2602466344833374, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.2602466344833374, "logits_per_char": -0.6301233172416687, "bits_per_byte": 0.9090757849337116, "num_chars": 2}, {"sum_logits": -1.5556148290634155, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.5556148290634155, "logits_per_char": -0.7778074145317078, "bits_per_byte": 1.1221388997123671, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 941, "native_id": "Mercury_7222530", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.14457368850708, "logits_per_token_corr": -1.14457368850708, "logits_per_char_corr": -0.57228684425354, "bits_per_byte_corr": 0.82563539217117}, "model_output": [{"sum_logits": -1.2179207801818848, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.2179207801818848, "logits_per_char": -0.6089603900909424, "bits_per_byte": 0.8785441348826424, "num_chars": 2}, {"sum_logits": -1.14457368850708, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.14457368850708, "logits_per_char": -0.57228684425354, "bits_per_byte": 0.82563539217117, "num_chars": 2}, {"sum_logits": -1.480797290802002, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.480797290802002, "logits_per_char": -0.740398645401001, "bits_per_byte": 1.0681694540016977, "num_chars": 2}, {"sum_logits": -1.9134373664855957, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.9134373664855957, "logits_per_char": -0.9567186832427979, "bits_per_byte": 1.3802532998411952, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 942, "native_id": "OHAT_2009_8_36", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4811999797821045, "logits_per_token_corr": -1.4811999797821045, "logits_per_char_corr": -0.7405999898910522, "bits_per_byte_corr": 1.0684599326990052}, "model_output": [{"sum_logits": -1.1642405986785889, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.1642405986785889, "logits_per_char": -0.5821202993392944, "bits_per_byte": 0.8398220690582023, "num_chars": 2}, {"sum_logits": -1.1203277111053467, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.1203277111053467, "logits_per_char": -0.5601638555526733, "bits_per_byte": 0.808145616491664, "num_chars": 2}, {"sum_logits": -1.4811999797821045, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4811999797821045, "logits_per_char": -0.7405999898910522, "bits_per_byte": 1.0684599326990052, "num_chars": 2}, {"sum_logits": -2.0853030681610107, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -2.0853030681610107, "logits_per_char": -1.0426515340805054, "bits_per_byte": 1.5042281975942957, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 943, "native_id": "Mercury_7141750", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1710773706436157, "logits_per_token_corr": -1.1710773706436157, "logits_per_char_corr": -0.5855386853218079, "bits_per_byte_corr": 0.8447537575630224}, "model_output": [{"sum_logits": -1.3028656244277954, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3028656244277954, "logits_per_char": -0.6514328122138977, "bits_per_byte": 0.9398188876540168, "num_chars": 2}, {"sum_logits": -1.1710773706436157, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.1710773706436157, "logits_per_char": -0.5855386853218079, "bits_per_byte": 0.8447537575630224, "num_chars": 2}, {"sum_logits": -1.3232437372207642, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3232437372207642, "logits_per_char": -0.6616218686103821, "bits_per_byte": 0.9545185887885733, "num_chars": 2}, {"sum_logits": -1.9527431726455688, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.9527431726455688, "logits_per_char": -0.9763715863227844, "bits_per_byte": 1.4086064456537835, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 944, "native_id": "TIMSS_2011_4_pg45", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4288185834884644, "logits_per_token_corr": -1.4288185834884644, "logits_per_char_corr": -0.7144092917442322, "bits_per_byte_corr": 1.030674742365141}, "model_output": [{"sum_logits": -1.4288185834884644, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4288185834884644, "logits_per_char": -0.7144092917442322, "bits_per_byte": 1.030674742365141, "num_chars": 2}, {"sum_logits": -1.2070664167404175, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2070664167404175, "logits_per_char": -0.6035332083702087, "bits_per_byte": 0.8707143667281311, "num_chars": 2}, {"sum_logits": -1.3023556470870972, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3023556470870972, "logits_per_char": -0.6511778235435486, "bits_per_byte": 0.9394510167638209, "num_chars": 2}, {"sum_logits": -1.7252053022384644, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.7252053022384644, "logits_per_char": -0.8626026511192322, "bits_per_byte": 1.244472567028283, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 945, "native_id": "MCAS_2014_5_5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4280279874801636, "logits_per_token_corr": -1.4280279874801636, "logits_per_char_corr": -0.7140139937400818, "bits_per_byte_corr": 1.0301044478948793}, "model_output": [{"sum_logits": -1.4280279874801636, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4280279874801636, "logits_per_char": -0.7140139937400818, "bits_per_byte": 1.0301044478948793, "num_chars": 2}, {"sum_logits": -1.116418719291687, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.116418719291687, "logits_per_char": -0.5582093596458435, "bits_per_byte": 0.8053258749394409, "num_chars": 2}, {"sum_logits": -1.3149412870407104, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.3149412870407104, "logits_per_char": -0.6574706435203552, "bits_per_byte": 0.9485296369375734, "num_chars": 2}, {"sum_logits": -1.8989657163619995, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.8989657163619995, "logits_per_char": -0.9494828581809998, "bits_per_byte": 1.3698142109077913, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 946, "native_id": "Mercury_SC_409241", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1916885375976562, "logits_per_token_corr": -1.1916885375976562, "logits_per_char_corr": -0.5958442687988281, "bits_per_byte_corr": 0.8596215717387974}, "model_output": [{"sum_logits": -1.5320701599121094, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.5320701599121094, "logits_per_char": -0.7660350799560547, "bits_per_byte": 1.1051550110003747, "num_chars": 2}, {"sum_logits": -1.122110366821289, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.122110366821289, "logits_per_char": -0.5610551834106445, "bits_per_byte": 0.8094315307721662, "num_chars": 2}, {"sum_logits": -1.1916885375976562, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.1916885375976562, "logits_per_char": -0.5958442687988281, "bits_per_byte": 0.8596215717387974, "num_chars": 2}, {"sum_logits": -1.9557151794433594, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.9557151794433594, "logits_per_char": -0.9778575897216797, "bits_per_byte": 1.4107502953881155, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 947, "native_id": "Mercury_SC_401147", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4847661256790161, "logits_per_token_corr": -1.4847661256790161, "logits_per_char_corr": -0.7423830628395081, "bits_per_byte_corr": 1.0710323631992875}, "model_output": [{"sum_logits": -1.182296872138977, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.182296872138977, "logits_per_char": -0.5911484360694885, "bits_per_byte": 0.8528469171473302, "num_chars": 2}, {"sum_logits": -1.1934434175491333, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.1934434175491333, "logits_per_char": -0.5967217087745667, "bits_per_byte": 0.8608874500404741, "num_chars": 2}, {"sum_logits": -1.4847661256790161, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4847661256790161, "logits_per_char": -0.7423830628395081, "bits_per_byte": 1.0710323631992875, "num_chars": 2}, {"sum_logits": -1.9085217714309692, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.9085217714309692, "logits_per_char": -0.9542608857154846, "bits_per_byte": 1.3767074475370287, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 948, "native_id": "Mercury_SC_LBS10273", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8860455751419067, "logits_per_token_corr": -1.8860455751419067, "logits_per_char_corr": -0.9430227875709534, "bits_per_byte_corr": 1.3604942990748783}, "model_output": [{"sum_logits": -1.3202649354934692, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.3202649354934692, "logits_per_char": -0.6601324677467346, "bits_per_byte": 0.9523698375486919, "num_chars": 2}, {"sum_logits": -1.112045407295227, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.112045407295227, "logits_per_char": -0.5560227036476135, "bits_per_byte": 0.8021711971746621, "num_chars": 2}, {"sum_logits": -1.445858120918274, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.445858120918274, "logits_per_char": -0.722929060459137, "bits_per_byte": 1.0429661704396638, "num_chars": 2}, {"sum_logits": -1.8860455751419067, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.8860455751419067, "logits_per_char": -0.9430227875709534, "bits_per_byte": 1.3604942990748783, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 949, "native_id": "Mercury_401523", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3452773094177246, "logits_per_token_corr": -1.3452773094177246, "logits_per_char_corr": -0.6726386547088623, "bits_per_byte_corr": 0.9704124514593966}, "model_output": [{"sum_logits": -1.3452773094177246, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.3452773094177246, "logits_per_char": -0.6726386547088623, "bits_per_byte": 0.9704124514593966, "num_chars": 2}, {"sum_logits": -1.0310759544372559, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": true, "logits_per_token": -1.0310759544372559, "logits_per_char": -0.5155379772186279, "bits_per_byte": 0.7437640831237763, "num_chars": 2}, {"sum_logits": -1.3989272117614746, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.3989272117614746, "logits_per_char": -0.6994636058807373, "bits_per_byte": 1.009112675487177, "num_chars": 2}, {"sum_logits": -2.084693431854248, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -2.084693431854248, "logits_per_char": -1.042346715927124, "bits_per_byte": 1.5037884379560393, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 950, "native_id": "Mercury_401865", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.870812177658081, "logits_per_token_corr": -1.870812177658081, "logits_per_char_corr": -0.9354060888290405, "bits_per_byte_corr": 1.3495057255719676}, "model_output": [{"sum_logits": -1.5369594097137451, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.5369594097137451, "logits_per_char": -0.7684797048568726, "bits_per_byte": 1.1086818592216208, "num_chars": 2}, {"sum_logits": -1.0893313884735107, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": true, "logits_per_token": -1.0893313884735107, "logits_per_char": -0.5446656942367554, "bits_per_byte": 0.785786496018276, "num_chars": 2}, {"sum_logits": -1.2987449169158936, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.2987449169158936, "logits_per_char": -0.6493724584579468, "bits_per_byte": 0.9368464255078273, "num_chars": 2}, {"sum_logits": -1.870812177658081, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.870812177658081, "logits_per_char": -0.9354060888290405, "bits_per_byte": 1.3495057255719676, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 951, "native_id": "MCAS_2013_8_29435", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3058092594146729, "logits_per_token_corr": -1.3058092594146729, "logits_per_char_corr": -0.6529046297073364, "bits_per_byte_corr": 0.941942271452896}, "model_output": [{"sum_logits": -1.2504122257232666, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.2504122257232666, "logits_per_char": -0.6252061128616333, "bits_per_byte": 0.901981758559592, "num_chars": 2}, {"sum_logits": -1.3058092594146729, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3058092594146729, "logits_per_char": -0.6529046297073364, "bits_per_byte": 0.941942271452896, "num_chars": 2}, {"sum_logits": -1.376434564590454, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.376434564590454, "logits_per_char": -0.688217282295227, "bits_per_byte": 0.9928876602221172, "num_chars": 2}, {"sum_logits": -1.7538282871246338, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.7538282871246338, "logits_per_char": -0.8769141435623169, "bits_per_byte": 1.265119686203656, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 952, "native_id": "Mercury_SC_406720", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3020713329315186, "logits_per_token_corr": -1.3020713329315186, "logits_per_char_corr": -0.6510356664657593, "bits_per_byte_corr": 0.9392459274526669}, "model_output": [{"sum_logits": -1.3428542613983154, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3428542613983154, "logits_per_char": -0.6714271306991577, "bits_per_byte": 0.9686645917786766, "num_chars": 2}, {"sum_logits": -1.181511640548706, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.181511640548706, "logits_per_char": -0.590755820274353, "bits_per_byte": 0.8522804922867132, "num_chars": 2}, {"sum_logits": -1.3020713329315186, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3020713329315186, "logits_per_char": -0.6510356664657593, "bits_per_byte": 0.9392459274526669, "num_chars": 2}, {"sum_logits": -1.927311658859253, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.927311658859253, "logits_per_char": -0.9636558294296265, "bits_per_byte": 1.3902614862428617, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 953, "native_id": "NYSEDREGENTS_2013_8_34", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4677366018295288, "logits_per_token_corr": -1.4677366018295288, "logits_per_char_corr": -0.7338683009147644, "bits_per_byte_corr": 1.0587481583961007}, "model_output": [{"sum_logits": -1.4677366018295288, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4677366018295288, "logits_per_char": -0.7338683009147644, "bits_per_byte": 1.0587481583961007, "num_chars": 2}, {"sum_logits": -1.1501573324203491, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.1501573324203491, "logits_per_char": -0.5750786662101746, "bits_per_byte": 0.8296631398630544, "num_chars": 2}, {"sum_logits": -1.3351167440414429, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.3351167440414429, "logits_per_char": -0.6675583720207214, "bits_per_byte": 0.9630831528188964, "num_chars": 2}, {"sum_logits": -1.7342084646224976, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.7342084646224976, "logits_per_char": -0.8671042323112488, "bits_per_byte": 1.250966975890169, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 954, "native_id": "Mercury_7038833", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.343319058418274, "logits_per_token_corr": -1.343319058418274, "logits_per_char_corr": -0.671659529209137, "bits_per_byte_corr": 0.9689998719565339}, "model_output": [{"sum_logits": -1.4435025453567505, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4435025453567505, "logits_per_char": -0.7217512726783752, "bits_per_byte": 1.041266981849138, "num_chars": 2}, {"sum_logits": -1.2718087434768677, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.2718087434768677, "logits_per_char": -0.6359043717384338, "bits_per_byte": 0.9174160835873095, "num_chars": 2}, {"sum_logits": -1.343319058418274, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.343319058418274, "logits_per_char": -0.671659529209137, "bits_per_byte": 0.9689998719565339, "num_chars": 2}, {"sum_logits": -1.5998135805130005, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.5998135805130005, "logits_per_char": -0.7999067902565002, "bits_per_byte": 1.1540215594772902, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 955, "native_id": "Mercury_175560", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3854483366012573, "logits_per_token_corr": -1.3854483366012573, "logits_per_char_corr": -0.6927241683006287, "bits_per_byte_corr": 0.9993897223119667}, "model_output": [{"sum_logits": -1.1748656034469604, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": true, "logits_per_token": -1.1748656034469604, "logits_per_char": -0.5874328017234802, "bits_per_byte": 0.8474863899025835, "num_chars": 2}, {"sum_logits": -1.3356016874313354, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.3356016874313354, "logits_per_char": -0.6678008437156677, "bits_per_byte": 0.9634329655307516, "num_chars": 2}, {"sum_logits": -1.3854483366012573, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.3854483366012573, "logits_per_char": -0.6927241683006287, "bits_per_byte": 0.9993897223119667, "num_chars": 2}, {"sum_logits": -1.834354281425476, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.834354281425476, "logits_per_char": -0.917177140712738, "bits_per_byte": 1.3232069125239367, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 956, "native_id": "Mercury_7005005", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.46977961063385, "logits_per_token_corr": -1.46977961063385, "logits_per_char_corr": -0.734889805316925, "bits_per_byte_corr": 1.0602218777313452}, "model_output": [{"sum_logits": -1.3972450494766235, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3972450494766235, "logits_per_char": -0.6986225247383118, "bits_per_byte": 1.0078992518940137, "num_chars": 2}, {"sum_logits": -1.0776993036270142, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.0776993036270142, "logits_per_char": -0.5388496518135071, "bits_per_byte": 0.7773957204566498, "num_chars": 2}, {"sum_logits": -1.46977961063385, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.46977961063385, "logits_per_char": -0.734889805316925, "bits_per_byte": 1.0602218777313452, "num_chars": 2}, {"sum_logits": -1.7630974054336548, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.7630974054336548, "logits_per_char": -0.8815487027168274, "bits_per_byte": 1.2718059417125795, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 957, "native_id": "Mercury_183890", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5693726539611816, "logits_per_token_corr": -1.5693726539611816, "logits_per_char_corr": -0.7846863269805908, "bits_per_byte_corr": 1.1320630725890872}, "model_output": [{"sum_logits": -1.5252346992492676, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.5252346992492676, "logits_per_char": -0.7626173496246338, "bits_per_byte": 1.1002242684001344, "num_chars": 2}, {"sum_logits": -1.2761731147766113, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.2761731147766113, "logits_per_char": -0.6380865573883057, "bits_per_byte": 0.9205643120026809, "num_chars": 2}, {"sum_logits": -1.2829079627990723, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.2829079627990723, "logits_per_char": -0.6414539813995361, "bits_per_byte": 0.925422477924257, "num_chars": 2}, {"sum_logits": -1.5693726539611816, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.5693726539611816, "logits_per_char": -0.7846863269805908, "bits_per_byte": 1.1320630725890872, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 958, "native_id": "Mercury_7270358", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5579288005828857, "logits_per_token_corr": -1.5579288005828857, "logits_per_char_corr": -0.7789644002914429, "bits_per_byte_corr": 1.1238080773303174}, "model_output": [{"sum_logits": -1.3985202312469482, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.3985202312469482, "logits_per_char": -0.6992601156234741, "bits_per_byte": 1.0088191011021541, "num_chars": 2}, {"sum_logits": -1.3216731548309326, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.3216731548309326, "logits_per_char": -0.6608365774154663, "bits_per_byte": 0.9533856530760138, "num_chars": 2}, {"sum_logits": -1.3438594341278076, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.3438594341278076, "logits_per_char": -0.6719297170639038, "bits_per_byte": 0.9693896706347148, "num_chars": 2}, {"sum_logits": -1.5579288005828857, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.5579288005828857, "logits_per_char": -0.7789644002914429, "bits_per_byte": 1.1238080773303174, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 959, "native_id": "MCAS_2013_5_29411", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9683579802513123, "logits_per_token_corr": -0.9683579802513123, "logits_per_char_corr": -0.48417899012565613, "bits_per_byte_corr": 0.6985226279574123}, "model_output": [{"sum_logits": -1.5734529495239258, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.5734529495239258, "logits_per_char": -0.7867264747619629, "bits_per_byte": 1.1350063836759554, "num_chars": 2}, {"sum_logits": -1.4859933853149414, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4859933853149414, "logits_per_char": -0.7429966926574707, "bits_per_byte": 1.0719176438946043, "num_chars": 2}, {"sum_logits": -0.9683579802513123, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -0.9683579802513123, "logits_per_char": -0.48417899012565613, "bits_per_byte": 0.6985226279574123, "num_chars": 2}, {"sum_logits": -1.7448549270629883, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.7448549270629883, "logits_per_char": -0.8724274635314941, "bits_per_byte": 1.2586467751731278, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 960, "native_id": "ACTAAP_2007_7_31", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1515213251113892, "logits_per_token_corr": -1.1515213251113892, "logits_per_char_corr": -0.5757606625556946, "bits_per_byte_corr": 0.8306470526086412}, "model_output": [{"sum_logits": -1.3739429712295532, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.3739429712295532, "logits_per_char": -0.6869714856147766, "bits_per_byte": 0.9910903555292742, "num_chars": 2}, {"sum_logits": -1.2635475397109985, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.2635475397109985, "logits_per_char": -0.6317737698554993, "bits_per_byte": 0.9114568847349089, "num_chars": 2}, {"sum_logits": -1.1515213251113892, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": true, "logits_per_token": -1.1515213251113892, "logits_per_char": -0.5757606625556946, "bits_per_byte": 0.8306470526086412, "num_chars": 2}, {"sum_logits": -1.978314995765686, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.978314995765686, "logits_per_char": -0.989157497882843, "bits_per_byte": 1.4270526168547382, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 961, "native_id": "Mercury_7082023", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4545735120773315, "logits_per_token_corr": -1.4545735120773315, "logits_per_char_corr": -0.7272867560386658, "bits_per_byte_corr": 1.0492529962419583}, "model_output": [{"sum_logits": -1.2683018445968628, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.2683018445968628, "logits_per_char": -0.6341509222984314, "bits_per_byte": 0.9148863907757666, "num_chars": 2}, {"sum_logits": -1.4545735120773315, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4545735120773315, "logits_per_char": -0.7272867560386658, "bits_per_byte": 1.0492529962419583, "num_chars": 2}, {"sum_logits": -1.260170817375183, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.260170817375183, "logits_per_char": -0.6300854086875916, "bits_per_byte": 0.9090210944507372, "num_chars": 2}, {"sum_logits": -1.6984471082687378, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.6984471082687378, "logits_per_char": -0.8492235541343689, "bits_per_byte": 1.2251706101566344, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 962, "native_id": "MCAS_2003_8_21", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1719011068344116, "logits_per_token_corr": -1.1719011068344116, "logits_per_char_corr": -0.5859505534172058, "bits_per_byte_corr": 0.8453479576217539}, "model_output": [{"sum_logits": -1.50851571559906, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.50851571559906, "logits_per_char": -0.75425785779953, "bits_per_byte": 1.0881640709996967, "num_chars": 2}, {"sum_logits": -1.1719011068344116, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.1719011068344116, "logits_per_char": -0.5859505534172058, "bits_per_byte": 0.8453479576217539, "num_chars": 2}, {"sum_logits": -1.448438048362732, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.448438048362732, "logits_per_char": -0.724219024181366, "bits_per_byte": 1.0448271947046517, "num_chars": 2}, {"sum_logits": -1.5350011587142944, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.5350011587142944, "logits_per_char": -0.7675005793571472, "bits_per_byte": 1.1072692797187582, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 963, "native_id": "NYSEDREGENTS_2015_8_9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.296401858329773, "logits_per_token_corr": -1.296401858329773, "logits_per_char_corr": -0.6482009291648865, "bits_per_byte_corr": 0.9351562660064718}, "model_output": [{"sum_logits": -1.2910231351852417, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2910231351852417, "logits_per_char": -0.6455115675926208, "bits_per_byte": 0.9312763374030041, "num_chars": 2}, {"sum_logits": -1.296401858329773, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.296401858329773, "logits_per_char": -0.6482009291648865, "bits_per_byte": 0.9351562660064718, "num_chars": 2}, {"sum_logits": -1.3772600889205933, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3772600889205933, "logits_per_char": -0.6886300444602966, "bits_per_byte": 0.9934831501507301, "num_chars": 2}, {"sum_logits": -1.7116316556930542, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.7116316556930542, "logits_per_char": -0.8558158278465271, "bits_per_byte": 1.2346812507493545, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 964, "native_id": "Mercury_7064750", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.777699589729309, "logits_per_token_corr": -1.777699589729309, "logits_per_char_corr": -0.8888497948646545, "bits_per_byte_corr": 1.2823391911473307}, "model_output": [{"sum_logits": -1.419453740119934, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.419453740119934, "logits_per_char": -0.709726870059967, "bits_per_byte": 1.0239194358218957, "num_chars": 2}, {"sum_logits": -1.0699092149734497, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -1.0699092149734497, "logits_per_char": -0.5349546074867249, "bits_per_byte": 0.7717763593223543, "num_chars": 2}, {"sum_logits": -1.4602538347244263, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.4602538347244263, "logits_per_char": -0.7301269173622131, "bits_per_byte": 1.0533504828987676, "num_chars": 2}, {"sum_logits": -1.777699589729309, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.777699589729309, "logits_per_char": -0.8888497948646545, "bits_per_byte": 1.2823391911473307, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 965, "native_id": "TIMSS_2007_8_pg113", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4401729106903076, "logits_per_token_corr": -1.4401729106903076, "logits_per_char_corr": -0.7200864553451538, "bits_per_byte_corr": 1.0388651581385118}, "model_output": [{"sum_logits": -1.5783679485321045, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.5783679485321045, "logits_per_char": -0.7891839742660522, "bits_per_byte": 1.138551806023495, "num_chars": 2}, {"sum_logits": -1.1028735637664795, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.1028735637664795, "logits_per_char": -0.5514367818832397, "bits_per_byte": 0.7955551105872906, "num_chars": 2}, {"sum_logits": -1.4401729106903076, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4401729106903076, "logits_per_char": -0.7200864553451538, "bits_per_byte": 1.0388651581385118, "num_chars": 2}, {"sum_logits": -1.6004607677459717, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.6004607677459717, "logits_per_char": -0.8002303838729858, "bits_per_byte": 1.1544884063830576, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 966, "native_id": "Mercury_7173583", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4873378276824951, "logits_per_token_corr": -1.4873378276824951, "logits_per_char_corr": -0.7436689138412476, "bits_per_byte_corr": 1.0728874540628204}, "model_output": [{"sum_logits": -1.1344802379608154, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.1344802379608154, "logits_per_char": -0.5672401189804077, "bits_per_byte": 0.8183545066468877, "num_chars": 2}, {"sum_logits": -1.1051757335662842, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.1051757335662842, "logits_per_char": -0.5525878667831421, "bits_per_byte": 0.797215775064023, "num_chars": 2}, {"sum_logits": -1.4873378276824951, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4873378276824951, "logits_per_char": -0.7436689138412476, "bits_per_byte": 1.0728874540628204, "num_chars": 2}, {"sum_logits": -2.212141752243042, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -2.212141752243042, "logits_per_char": -1.106070876121521, "bits_per_byte": 1.5957229678533758, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 967, "native_id": "Mercury_403930", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.432693600654602, "logits_per_token_corr": -1.432693600654602, "logits_per_char_corr": -0.716346800327301, "bits_per_byte_corr": 1.0334699763896162}, "model_output": [{"sum_logits": -1.391719937324524, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.391719937324524, "logits_per_char": -0.695859968662262, "bits_per_byte": 1.003913725942916, "num_chars": 2}, {"sum_logits": -0.9959107637405396, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -0.9959107637405396, "logits_per_char": -0.4979553818702698, "bits_per_byte": 0.7183977600087244, "num_chars": 2}, {"sum_logits": -1.432693600654602, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.432693600654602, "logits_per_char": -0.716346800327301, "bits_per_byte": 1.0334699763896162, "num_chars": 2}, {"sum_logits": -2.031524658203125, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -2.031524658203125, "logits_per_char": -1.0157623291015625, "bits_per_byte": 1.4654352749177002, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 968, "native_id": "Mercury_417118", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3531899452209473, "logits_per_token_corr": -1.3531899452209473, "logits_per_char_corr": -0.6765949726104736, "bits_per_byte_corr": 0.9761202116762356}, "model_output": [{"sum_logits": -1.4900784492492676, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4900784492492676, "logits_per_char": -0.7450392246246338, "bits_per_byte": 1.0748643946344898, "num_chars": 2}, {"sum_logits": -1.3691887855529785, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.3691887855529785, "logits_per_char": -0.6845943927764893, "bits_per_byte": 0.9876609354797418, "num_chars": 2}, {"sum_logits": -1.3531899452209473, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.3531899452209473, "logits_per_char": -0.6765949726104736, "bits_per_byte": 0.9761202116762356, "num_chars": 2}, {"sum_logits": -1.4508862495422363, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4508862495422363, "logits_per_char": -0.7254431247711182, "bits_per_byte": 1.0465931985550376, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 969, "native_id": "Mercury_7143010", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9890535473823547, "logits_per_token_corr": -0.9890535473823547, "logits_per_char_corr": -0.49452677369117737, "bits_per_byte_corr": 0.7134513239915928}, "model_output": [{"sum_logits": -1.393563985824585, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.393563985824585, "logits_per_char": -0.6967819929122925, "bits_per_byte": 1.0052439257560155, "num_chars": 2}, {"sum_logits": -0.9890535473823547, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -0.9890535473823547, "logits_per_char": -0.49452677369117737, "bits_per_byte": 0.7134513239915928, "num_chars": 2}, {"sum_logits": -1.48502516746521, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.48502516746521, "logits_per_char": -0.742512583732605, "bits_per_byte": 1.07121922234945, "num_chars": 2}, {"sum_logits": -1.9458959102630615, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.9458959102630615, "logits_per_char": -0.9729479551315308, "bits_per_byte": 1.4036671899123256, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 970, "native_id": "Mercury_SC_401801", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.353629469871521, "logits_per_token_corr": -1.353629469871521, "logits_per_char_corr": -0.6768147349357605, "bits_per_byte_corr": 0.9764372616931014}, "model_output": [{"sum_logits": -1.353629469871521, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.353629469871521, "logits_per_char": -0.6768147349357605, "bits_per_byte": 0.9764372616931014, "num_chars": 2}, {"sum_logits": -1.1978944540023804, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.1978944540023804, "logits_per_char": -0.5989472270011902, "bits_per_byte": 0.8640981941494341, "num_chars": 2}, {"sum_logits": -1.2594941854476929, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.2594941854476929, "logits_per_char": -0.6297470927238464, "bits_per_byte": 0.9085330076875882, "num_chars": 2}, {"sum_logits": -1.9189733266830444, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.9189733266830444, "logits_per_char": -0.9594866633415222, "bits_per_byte": 1.384246651002907, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 971, "native_id": "Mercury_410334", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.113083839416504, "logits_per_token_corr": -1.113083839416504, "logits_per_char_corr": -0.556541919708252, "bits_per_byte_corr": 0.8029202676104956}, "model_output": [{"sum_logits": -1.4552946090698242, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.4552946090698242, "logits_per_char": -0.7276473045349121, "bits_per_byte": 1.0497731577694933, "num_chars": 2}, {"sum_logits": -1.113083839416504, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": true, "logits_per_token": -1.113083839416504, "logits_per_char": -0.556541919708252, "bits_per_byte": 0.8029202676104956, "num_chars": 2}, {"sum_logits": -1.394547462463379, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.394547462463379, "logits_per_char": -0.6972737312316895, "bits_per_byte": 1.005953354190825, "num_chars": 2}, {"sum_logits": -1.7392282485961914, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.7392282485961914, "logits_per_char": -0.8696141242980957, "bits_per_byte": 1.2545879846127626, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 972, "native_id": "NAEP_2000_4_S12+3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4335517883300781, "logits_per_token_corr": -1.4335517883300781, "logits_per_char_corr": -0.7167758941650391, "bits_per_byte_corr": 1.0340890279413972}, "model_output": [{"sum_logits": -1.7112674713134766, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.7112674713134766, "logits_per_char": -0.8556337356567383, "bits_per_byte": 1.2344185472501614, "num_chars": 2}, {"sum_logits": -1.4335517883300781, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4335517883300781, "logits_per_char": -0.7167758941650391, "bits_per_byte": 1.0340890279413972, "num_chars": 2}, {"sum_logits": -1.1836299896240234, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.1836299896240234, "logits_per_char": -0.5918149948120117, "bits_per_byte": 0.8538085581396303, "num_chars": 2}, {"sum_logits": -1.3680648803710938, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3680648803710938, "logits_per_char": -0.6840324401855469, "bits_per_byte": 0.9868502092635739, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 973, "native_id": "Mercury_7218015", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9693152904510498, "logits_per_token_corr": -1.9693152904510498, "logits_per_char_corr": -0.9846576452255249, "bits_per_byte_corr": 1.4205607017412896}, "model_output": [{"sum_logits": -1.339195966720581, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.339195966720581, "logits_per_char": -0.6695979833602905, "bits_per_byte": 0.9660256899838358, "num_chars": 2}, {"sum_logits": -1.0997092723846436, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.0997092723846436, "logits_per_char": -0.5498546361923218, "bits_per_byte": 0.7932725568450377, "num_chars": 2}, {"sum_logits": -1.3907077312469482, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3907077312469482, "logits_per_char": -0.6953538656234741, "bits_per_byte": 1.0031835735986774, "num_chars": 2}, {"sum_logits": -1.9693152904510498, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.9693152904510498, "logits_per_char": -0.9846576452255249, "bits_per_byte": 1.4205607017412896, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 974, "native_id": "Mercury_7109603", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3884291648864746, "logits_per_token_corr": -1.3884291648864746, "logits_per_char_corr": -0.6942145824432373, "bits_per_byte_corr": 1.0015399354043804}, "model_output": [{"sum_logits": -1.3884291648864746, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.3884291648864746, "logits_per_char": -0.6942145824432373, "bits_per_byte": 1.0015399354043804, "num_chars": 2}, {"sum_logits": -0.9976038932800293, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": true, "logits_per_token": -0.9976038932800293, "logits_per_char": -0.49880194664001465, "bits_per_byte": 0.7196190948038275, "num_chars": 2}, {"sum_logits": -1.4510626792907715, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.4510626792907715, "logits_per_char": -0.7255313396453857, "bits_per_byte": 1.0467204657166762, "num_chars": 2}, {"sum_logits": -1.9750361442565918, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.9750361442565918, "logits_per_char": -0.9875180721282959, "bits_per_byte": 1.4246874254487456, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 975, "native_id": "NYSEDREGENTS_2008_8_42", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0576655864715576, "logits_per_token_corr": -1.0576655864715576, "logits_per_char_corr": -0.5288327932357788, "bits_per_byte_corr": 0.7629444482612647}, "model_output": [{"sum_logits": -1.5623729228973389, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.5623729228973389, "logits_per_char": -0.7811864614486694, "bits_per_byte": 1.1270138339424025, "num_chars": 2}, {"sum_logits": -1.3628413677215576, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3628413677215576, "logits_per_char": -0.6814206838607788, "bits_per_byte": 0.983082241365818, "num_chars": 2}, {"sum_logits": -1.0576655864715576, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.0576655864715576, "logits_per_char": -0.5288327932357788, "bits_per_byte": 0.7629444482612647, "num_chars": 2}, {"sum_logits": -1.7833106517791748, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.7833106517791748, "logits_per_char": -0.8916553258895874, "bits_per_byte": 1.2863867168440646, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 976, "native_id": "NAEP_2000_8_S11+11", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2383348941802979, "logits_per_token_corr": -1.2383348941802979, "logits_per_char_corr": -0.6191674470901489, "bits_per_byte_corr": 0.8932698053974792}, "model_output": [{"sum_logits": -1.607097864151001, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.607097864151001, "logits_per_char": -0.8035489320755005, "bits_per_byte": 1.15927605941778, "num_chars": 2}, {"sum_logits": -1.2383348941802979, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.2383348941802979, "logits_per_char": -0.6191674470901489, "bits_per_byte": 0.8932698053974792, "num_chars": 2}, {"sum_logits": -1.1244699954986572, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.1244699954986572, "logits_per_char": -0.5622349977493286, "bits_per_byte": 0.8111336430677566, "num_chars": 2}, {"sum_logits": -1.7725985050201416, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.7725985050201416, "logits_per_char": -0.8862992525100708, "bits_per_byte": 1.2786595363407929, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 977, "native_id": "Mercury_7271670", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.07277774810791, "logits_per_token_corr": -2.07277774810791, "logits_per_char_corr": -1.036388874053955, "bits_per_byte_corr": 1.4951930890312117}, "model_output": [{"sum_logits": -1.3472280502319336, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3472280502319336, "logits_per_char": -0.6736140251159668, "bits_per_byte": 0.9718196135087571, "num_chars": 2}, {"sum_logits": -1.0915441513061523, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.0915441513061523, "logits_per_char": -0.5457720756530762, "bits_per_byte": 0.7873826670009348, "num_chars": 2}, {"sum_logits": -1.3117074966430664, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3117074966430664, "logits_per_char": -0.6558537483215332, "bits_per_byte": 0.9461969502525941, "num_chars": 2}, {"sum_logits": -2.07277774810791, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -2.07277774810791, "logits_per_char": -1.036388874053955, "bits_per_byte": 1.4951930890312117, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 978, "native_id": "ACTAAP_2009_5_8", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.465383529663086, "logits_per_token_corr": -1.465383529663086, "logits_per_char_corr": -0.732691764831543, "bits_per_byte_corr": 1.057050775623409}, "model_output": [{"sum_logits": -1.4774398803710938, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4774398803710938, "logits_per_char": -0.7387199401855469, "bits_per_byte": 1.0657475943122459, "num_chars": 2}, {"sum_logits": -1.465383529663086, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.465383529663086, "logits_per_char": -0.732691764831543, "bits_per_byte": 1.057050775623409, "num_chars": 2}, {"sum_logits": -1.244802474975586, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.244802474975586, "logits_per_char": -0.622401237487793, "bits_per_byte": 0.897935178767438, "num_chars": 2}, {"sum_logits": -1.4375553131103516, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4375553131103516, "logits_per_char": -0.7187776565551758, "bits_per_byte": 1.0369769606146877, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 979, "native_id": "NYSEDREGENTS_2012_4_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1027014255523682, "logits_per_token_corr": -1.1027014255523682, "logits_per_char_corr": -0.5513507127761841, "bits_per_byte_corr": 0.7954309391133675}, "model_output": [{"sum_logits": -1.368877649307251, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.368877649307251, "logits_per_char": -0.6844388246536255, "bits_per_byte": 0.9874364981203657, "num_chars": 2}, {"sum_logits": -1.1027014255523682, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.1027014255523682, "logits_per_char": -0.5513507127761841, "bits_per_byte": 0.7954309391133675, "num_chars": 2}, {"sum_logits": -1.3309309482574463, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3309309482574463, "logits_per_char": -0.6654654741287231, "bits_per_byte": 0.9600637394090215, "num_chars": 2}, {"sum_logits": -1.9942514896392822, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.9942514896392822, "logits_per_char": -0.9971257448196411, "bits_per_byte": 1.4385483671950436, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 980, "native_id": "Mercury_SC_409030", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.000539541244507, "logits_per_token_corr": -2.000539541244507, "logits_per_char_corr": -1.0002697706222534, "bits_per_byte_corr": 1.4430842376289028}, "model_output": [{"sum_logits": -1.129239797592163, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.129239797592163, "logits_per_char": -0.5646198987960815, "bits_per_byte": 0.8145743279809204, "num_chars": 2}, {"sum_logits": -1.2133080959320068, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.2133080959320068, "logits_per_char": -0.6066540479660034, "bits_per_byte": 0.8752167865363972, "num_chars": 2}, {"sum_logits": -1.471017599105835, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.471017599105835, "logits_per_char": -0.7355087995529175, "bits_per_byte": 1.061114897645951, "num_chars": 2}, {"sum_logits": -2.000539541244507, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -2.000539541244507, "logits_per_char": -1.0002697706222534, "bits_per_byte": 1.4430842376289028, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 981, "native_id": "MEA_2013_8_8", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9979664087295532, "logits_per_token_corr": -1.9979664087295532, "logits_per_char_corr": -0.9989832043647766, "bits_per_byte_corr": 1.4412281148694646}, "model_output": [{"sum_logits": -1.5949207544326782, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.5949207544326782, "logits_per_char": -0.7974603772163391, "bits_per_byte": 1.150492131516281, "num_chars": 2}, {"sum_logits": -1.5965019464492798, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.5965019464492798, "logits_per_char": -0.7982509732246399, "bits_per_byte": 1.151632720456804, "num_chars": 2}, {"sum_logits": -1.5531421899795532, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.5531421899795532, "logits_per_char": -0.7765710949897766, "bits_per_byte": 1.1203552676402677, "num_chars": 2}, {"sum_logits": -1.9979664087295532, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.9979664087295532, "logits_per_char": -0.9989832043647766, "bits_per_byte": 1.4412281148694646, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 982, "native_id": "Mercury_7140333", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.803802728652954, "logits_per_token_corr": -1.803802728652954, "logits_per_char_corr": -0.901901364326477, "bits_per_byte_corr": 1.3011686256857335}, "model_output": [{"sum_logits": -1.4504358768463135, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.4504358768463135, "logits_per_char": -0.7252179384231567, "bits_per_byte": 1.0462683233275576, "num_chars": 2}, {"sum_logits": -1.0442850589752197, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": true, "logits_per_token": -1.0442850589752197, "logits_per_char": -0.5221425294876099, "bits_per_byte": 0.7532924379295353, "num_chars": 2}, {"sum_logits": -1.4568579196929932, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.4568579196929932, "logits_per_char": -0.7284289598464966, "bits_per_byte": 1.0509008480112014, "num_chars": 2}, {"sum_logits": -1.803802728652954, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.803802728652954, "logits_per_char": -0.901901364326477, "bits_per_byte": 1.3011686256857335, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 983, "native_id": "Mercury_SC_LBS10664", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2908285856246948, "logits_per_token_corr": -1.2908285856246948, "logits_per_char_corr": -0.6454142928123474, "bits_per_byte_corr": 0.9311359995598999}, "model_output": [{"sum_logits": -1.5245531797409058, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.5245531797409058, "logits_per_char": -0.7622765898704529, "bits_per_byte": 1.0997326559926428, "num_chars": 2}, {"sum_logits": -1.2332266569137573, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.2332266569137573, "logits_per_char": -0.6166133284568787, "bits_per_byte": 0.8895849911114155, "num_chars": 2}, {"sum_logits": -1.2908285856246948, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.2908285856246948, "logits_per_char": -0.6454142928123474, "bits_per_byte": 0.9311359995598999, "num_chars": 2}, {"sum_logits": -1.608697772026062, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.608697772026062, "logits_per_char": -0.804348886013031, "bits_per_byte": 1.1604301489963957, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 984, "native_id": "Mercury_7171430", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6069536209106445, "logits_per_token_corr": -1.6069536209106445, "logits_per_char_corr": -0.8034768104553223, "bits_per_byte_corr": 1.159172009914008}, "model_output": [{"sum_logits": -1.6874494552612305, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.6874494552612305, "logits_per_char": -0.8437247276306152, "bits_per_byte": 1.2172374804289545, "num_chars": 2}, {"sum_logits": -1.6069536209106445, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.6069536209106445, "logits_per_char": -0.8034768104553223, "bits_per_byte": 1.159172009914008, "num_chars": 2}, {"sum_logits": -1.1003942489624023, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.1003942489624023, "logits_per_char": -0.5501971244812012, "bits_per_byte": 0.7937666630009669, "num_chars": 2}, {"sum_logits": -1.3799409866333008, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3799409866333008, "logits_per_char": -0.6899704933166504, "bits_per_byte": 0.9954170090683583, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 985, "native_id": "Mercury_SC_407572", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7332508563995361, "logits_per_token_corr": -1.7332508563995361, "logits_per_char_corr": -0.8666254281997681, "bits_per_byte_corr": 1.250276207572978}, "model_output": [{"sum_logits": -1.3564884662628174, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3564884662628174, "logits_per_char": -0.6782442331314087, "bits_per_byte": 0.9784995916509243, "num_chars": 2}, {"sum_logits": -1.2302124500274658, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.2302124500274658, "logits_per_char": -0.6151062250137329, "bits_per_byte": 0.8874107004478808, "num_chars": 2}, {"sum_logits": -1.338395357131958, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.338395357131958, "logits_per_char": -0.669197678565979, "bits_per_byte": 0.9654481722422381, "num_chars": 2}, {"sum_logits": -1.7332508563995361, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.7332508563995361, "logits_per_char": -0.8666254281997681, "bits_per_byte": 1.250276207572978, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 986, "native_id": "VASoL_2009_3_2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3827983140945435, "logits_per_token_corr": -1.3827983140945435, "logits_per_char_corr": -0.6913991570472717, "bits_per_byte_corr": 0.9974781351476252}, "model_output": [{"sum_logits": -1.8245230913162231, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.8245230913162231, "logits_per_char": -0.9122615456581116, "bits_per_byte": 1.3161152079156038, "num_chars": 2}, {"sum_logits": -1.2440444231033325, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2440444231033325, "logits_per_char": -0.6220222115516663, "bits_per_byte": 0.8973883599290193, "num_chars": 2}, {"sum_logits": -1.2721854448318481, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.2721854448318481, "logits_per_char": -0.6360927224159241, "bits_per_byte": 0.917687816175673, "num_chars": 2}, {"sum_logits": -1.3827983140945435, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3827983140945435, "logits_per_char": -0.6913991570472717, "bits_per_byte": 0.9974781351476252, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 987, "native_id": "Mercury_SC_407383", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.3292176723480225, "logits_per_token_corr": -2.3292176723480225, "logits_per_char_corr": -1.1646088361740112, "bits_per_byte_corr": 1.6801753925249203}, "model_output": [{"sum_logits": -1.169095754623413, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.169095754623413, "logits_per_char": -0.5845478773117065, "bits_per_byte": 0.8433243237603751, "num_chars": 2}, {"sum_logits": -1.0744760036468506, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.0744760036468506, "logits_per_char": -0.5372380018234253, "bits_per_byte": 0.7750706010083084, "num_chars": 2}, {"sum_logits": -1.4237496852874756, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4237496852874756, "logits_per_char": -0.7118748426437378, "bits_per_byte": 1.0270183052164696, "num_chars": 2}, {"sum_logits": -2.3292176723480225, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -2.3292176723480225, "logits_per_char": -1.1646088361740112, "bits_per_byte": 1.6801753925249203, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 988, "native_id": "Mercury_7218400", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0595694780349731, "logits_per_token_corr": -1.0595694780349731, "logits_per_char_corr": -0.5297847390174866, "bits_per_byte_corr": 0.7643178157197307}, "model_output": [{"sum_logits": -1.5154963731765747, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.5154963731765747, "logits_per_char": -0.7577481865882874, "bits_per_byte": 1.0931995510343124, "num_chars": 2}, {"sum_logits": -1.0595694780349731, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": true, "logits_per_token": -1.0595694780349731, "logits_per_char": -0.5297847390174866, "bits_per_byte": 0.7643178157197307, "num_chars": 2}, {"sum_logits": -1.4100314378738403, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.4100314378738403, "logits_per_char": -0.7050157189369202, "bits_per_byte": 1.0171226814597927, "num_chars": 2}, {"sum_logits": -1.7356120347976685, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.7356120347976685, "logits_per_char": -0.8678060173988342, "bits_per_byte": 1.2519794377557991, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 989, "native_id": "Mercury_184818", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6007428169250488, "logits_per_token_corr": -1.6007428169250488, "logits_per_char_corr": -0.8003714084625244, "bits_per_byte_corr": 1.1546918618590285}, "model_output": [{"sum_logits": -1.4839882850646973, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4839882850646973, "logits_per_char": -0.7419941425323486, "bits_per_byte": 1.0704712698008472, "num_chars": 2}, {"sum_logits": -1.1959786415100098, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.1959786415100098, "logits_per_char": -0.5979893207550049, "bits_per_byte": 0.862716227558425, "num_chars": 2}, {"sum_logits": -1.358747959136963, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.358747959136963, "logits_per_char": -0.6793739795684814, "bits_per_byte": 0.9801294712331523, "num_chars": 2}, {"sum_logits": -1.6007428169250488, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.6007428169250488, "logits_per_char": -0.8003714084625244, "bits_per_byte": 1.1546918618590285, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 990, "native_id": "Mercury_SC_405931", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8612656593322754, "logits_per_token_corr": -1.8612656593322754, "logits_per_char_corr": -0.9306328296661377, "bits_per_byte_corr": 1.3426193682487648}, "model_output": [{"sum_logits": -1.4301609992980957, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4301609992980957, "logits_per_char": -0.7150804996490479, "bits_per_byte": 1.0316430906808247, "num_chars": 2}, {"sum_logits": -1.0832409858703613, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.0832409858703613, "logits_per_char": -0.5416204929351807, "bits_per_byte": 0.7813931992019824, "num_chars": 2}, {"sum_logits": -1.3594574928283691, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.3594574928283691, "logits_per_char": -0.6797287464141846, "bits_per_byte": 0.9806412916021203, "num_chars": 2}, {"sum_logits": -1.8612656593322754, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.8612656593322754, "logits_per_char": -0.9306328296661377, "bits_per_byte": 1.3426193682487648, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 991, "native_id": "Mercury_SC_416177", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4037930965423584, "logits_per_token_corr": -1.4037930965423584, "logits_per_char_corr": -0.7018965482711792, "bits_per_byte_corr": 1.0126226694086387}, "model_output": [{"sum_logits": -1.28989577293396, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.28989577293396, "logits_per_char": -0.64494788646698, "bits_per_byte": 0.9304631174383987, "num_chars": 2}, {"sum_logits": -1.2281453609466553, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.2281453609466553, "logits_per_char": -0.6140726804733276, "bits_per_byte": 0.8859196108648992, "num_chars": 2}, {"sum_logits": -1.4037930965423584, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4037930965423584, "logits_per_char": -0.7018965482711792, "bits_per_byte": 1.0126226694086387, "num_chars": 2}, {"sum_logits": -1.840808629989624, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.840808629989624, "logits_per_char": -0.920404314994812, "bits_per_byte": 1.3278627408567727, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 992, "native_id": "Mercury_SC_406625", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0558617115020752, "logits_per_token_corr": -1.0558617115020752, "logits_per_char_corr": -0.5279308557510376, "bits_per_byte_corr": 0.7616432275248358}, "model_output": [{"sum_logits": -1.1437714099884033, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.1437714099884033, "logits_per_char": -0.5718857049942017, "bits_per_byte": 0.8250566705510162, "num_chars": 2}, {"sum_logits": -1.0558617115020752, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.0558617115020752, "logits_per_char": -0.5279308557510376, "bits_per_byte": 0.7616432275248358, "num_chars": 2}, {"sum_logits": -1.4945709705352783, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4945709705352783, "logits_per_char": -0.7472854852676392, "bits_per_byte": 1.0781050637247, "num_chars": 2}, {"sum_logits": -2.3183090686798096, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -2.3183090686798096, "logits_per_char": -1.1591545343399048, "bits_per_byte": 1.6723064983173377, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 993, "native_id": "MCAS_2014_8_16", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9471161365509033, "logits_per_token_corr": -1.9471161365509033, "logits_per_char_corr": -0.9735580682754517, "bits_per_byte_corr": 1.404547397119442}, "model_output": [{"sum_logits": -1.5344326496124268, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.5344326496124268, "logits_per_char": -0.7672163248062134, "bits_per_byte": 1.1068591870877755, "num_chars": 2}, {"sum_logits": -1.0031540393829346, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": true, "logits_per_token": -1.0031540393829346, "logits_per_char": -0.5015770196914673, "bits_per_byte": 0.7236226789332657, "num_chars": 2}, {"sum_logits": -1.3467133045196533, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.3467133045196533, "logits_per_char": -0.6733566522598267, "bits_per_byte": 0.9714483029655441, "num_chars": 2}, {"sum_logits": -1.9471161365509033, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.9471161365509033, "logits_per_char": -0.9735580682754517, "bits_per_byte": 1.404547397119442, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 994, "native_id": "Mercury_7138460", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.535051941871643, "logits_per_token_corr": -1.535051941871643, "logits_per_char_corr": -0.7675259709358215, "bits_per_byte_corr": 1.107305912023392}, "model_output": [{"sum_logits": -1.2141672372817993, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2141672372817993, "logits_per_char": -0.6070836186408997, "bits_per_byte": 0.8758365260187818, "num_chars": 2}, {"sum_logits": -1.2463270425796509, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.2463270425796509, "logits_per_char": -0.6231635212898254, "bits_per_byte": 0.8990349218283811, "num_chars": 2}, {"sum_logits": -1.535051941871643, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.535051941871643, "logits_per_char": -0.7675259709358215, "bits_per_byte": 1.107305912023392, "num_chars": 2}, {"sum_logits": -1.7226873636245728, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.7226873636245728, "logits_per_char": -0.8613436818122864, "bits_per_byte": 1.2426562582525196, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 995, "native_id": "Mercury_7129640", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.137316107749939, "logits_per_token_corr": -1.137316107749939, "logits_per_char_corr": -0.5686580538749695, "bits_per_byte_corr": 0.8204001542875768}, "model_output": [{"sum_logits": -1.1955913305282593, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.1955913305282593, "logits_per_char": -0.5977956652641296, "bits_per_byte": 0.8624368417420982, "num_chars": 2}, {"sum_logits": -1.137316107749939, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.137316107749939, "logits_per_char": -0.5686580538749695, "bits_per_byte": 0.8204001542875768, "num_chars": 2}, {"sum_logits": -1.5787433385849, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.5787433385849, "logits_per_char": -0.78937166929245, "bits_per_byte": 1.1388225927072786, "num_chars": 2}, {"sum_logits": -1.8766826391220093, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.8766826391220093, "logits_per_char": -0.9383413195610046, "bits_per_byte": 1.35374036839284, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 996, "native_id": "Mercury_7024290", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3637210130691528, "logits_per_token_corr": -1.3637210130691528, "logits_per_char_corr": -0.6818605065345764, "bits_per_byte_corr": 0.9837167713561767}, "model_output": [{"sum_logits": -1.3718234300613403, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3718234300613403, "logits_per_char": -0.6859117150306702, "bits_per_byte": 0.9895614297631027, "num_chars": 2}, {"sum_logits": -1.3637210130691528, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.3637210130691528, "logits_per_char": -0.6818605065345764, "bits_per_byte": 0.9837167713561767, "num_chars": 2}, {"sum_logits": -1.4684592485427856, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4684592485427856, "logits_per_char": -0.7342296242713928, "bits_per_byte": 1.0592694378108662, "num_chars": 2}, {"sum_logits": -1.5175944566726685, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.5175944566726685, "logits_per_char": -0.7587972283363342, "bits_per_byte": 1.0947129983619064, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 997, "native_id": "NYSEDREGENTS_2008_4_28", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5244346857070923, "logits_per_token_corr": -1.5244346857070923, "logits_per_char_corr": -0.7622173428535461, "bits_per_byte_corr": 1.0996471806151638}, "model_output": [{"sum_logits": -1.0866485834121704, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.0866485834121704, "logits_per_char": -0.5433242917060852, "bits_per_byte": 0.7838512612394408, "num_chars": 2}, {"sum_logits": -1.0654484033584595, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.0654484033584595, "logits_per_char": -0.5327242016792297, "bits_per_byte": 0.7685585639247089, "num_chars": 2}, {"sum_logits": -1.5244346857070923, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.5244346857070923, "logits_per_char": -0.7622173428535461, "bits_per_byte": 1.0996471806151638, "num_chars": 2}, {"sum_logits": -2.3943495750427246, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -2.3943495750427246, "logits_per_char": -1.1971747875213623, "bits_per_byte": 1.7271581290356086, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 998, "native_id": "Mercury_SC_414339", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.452512502670288, "logits_per_token_corr": -1.452512502670288, "logits_per_char_corr": -0.726256251335144, "bits_per_byte_corr": 1.0477662922165736}, "model_output": [{"sum_logits": -1.186389684677124, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.186389684677124, "logits_per_char": -0.593194842338562, "bits_per_byte": 0.8557992573233688, "num_chars": 2}, {"sum_logits": -1.107023000717163, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.107023000717163, "logits_per_char": -0.5535115003585815, "bits_per_byte": 0.7985482966429089, "num_chars": 2}, {"sum_logits": -1.452512502670288, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.452512502670288, "logits_per_char": -0.726256251335144, "bits_per_byte": 1.0477662922165736, "num_chars": 2}, {"sum_logits": -2.1431596279144287, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -2.1431596279144287, "logits_per_char": -1.0715798139572144, "bits_per_byte": 1.5459628835139019, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 999, "native_id": "LEAP_2000_8_2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5868395566940308, "logits_per_token_corr": -1.5868395566940308, "logits_per_char_corr": -0.7934197783470154, "bits_per_byte_corr": 1.144662779565282}, "model_output": [{"sum_logits": -1.5868395566940308, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.5868395566940308, "logits_per_char": -0.7934197783470154, "bits_per_byte": 1.144662779565282, "num_chars": 2}, {"sum_logits": -1.0746935606002808, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": true, "logits_per_token": -1.0746935606002808, "logits_per_char": -0.5373467803001404, "bits_per_byte": 0.7752275351772208, "num_chars": 2}, {"sum_logits": -1.4543570280075073, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.4543570280075073, "logits_per_char": -0.7271785140037537, "bits_per_byte": 1.0490968359949748, "num_chars": 2}, {"sum_logits": -1.6075953245162964, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.6075953245162964, "logits_per_char": -0.8037976622581482, "bits_per_byte": 1.1596349012188054, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1000, "native_id": "Mercury_7172270", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2840921878814697, "logits_per_token_corr": -1.2840921878814697, "logits_per_char_corr": -0.6420460939407349, "bits_per_byte_corr": 0.9262767157510932}, "model_output": [{"sum_logits": -1.2840921878814697, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.2840921878814697, "logits_per_char": -0.6420460939407349, "bits_per_byte": 0.9262767157510932, "num_chars": 2}, {"sum_logits": -1.1372778415679932, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": true, "logits_per_token": -1.1372778415679932, "logits_per_char": -0.5686389207839966, "bits_per_byte": 0.8203725510721134, "num_chars": 2}, {"sum_logits": -1.3792688846588135, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.3792688846588135, "logits_per_char": -0.6896344423294067, "bits_per_byte": 0.9949321899755758, "num_chars": 2}, {"sum_logits": -1.9749987125396729, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.9749987125396729, "logits_per_char": -0.9874993562698364, "bits_per_byte": 1.4246604241725602, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1001, "native_id": "Mercury_184205", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.472420573234558, "logits_per_token_corr": -1.472420573234558, "logits_per_char_corr": -0.736210286617279, "bits_per_byte_corr": 1.062126929554954}, "model_output": [{"sum_logits": -1.2907761335372925, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.2907761335372925, "logits_per_char": -0.6453880667686462, "bits_per_byte": 0.93109816337671, "num_chars": 2}, {"sum_logits": -1.0438889265060425, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.0438889265060425, "logits_per_char": -0.5219444632530212, "bits_per_byte": 0.7530066887551266, "num_chars": 2}, {"sum_logits": -1.472420573234558, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.472420573234558, "logits_per_char": -0.736210286617279, "bits_per_byte": 1.062126929554954, "num_chars": 2}, {"sum_logits": -2.0665159225463867, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -2.0665159225463867, "logits_per_char": -1.0332579612731934, "bits_per_byte": 1.4906761366889476, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1002, "native_id": "Mercury_SC_400683", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4474531412124634, "logits_per_token_corr": -1.4474531412124634, "logits_per_char_corr": -0.7237265706062317, "bits_per_byte_corr": 1.0441167343739368}, "model_output": [{"sum_logits": -1.4474531412124634, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4474531412124634, "logits_per_char": -0.7237265706062317, "bits_per_byte": 1.0441167343739368, "num_chars": 2}, {"sum_logits": -0.9670511484146118, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -0.9670511484146118, "logits_per_char": -0.4835255742073059, "bits_per_byte": 0.6975799480523699, "num_chars": 2}, {"sum_logits": -1.3676401376724243, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3676401376724243, "logits_per_char": -0.6838200688362122, "bits_per_byte": 0.9865438221710616, "num_chars": 2}, {"sum_logits": -2.120579242706299, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -2.120579242706299, "logits_per_char": -1.0602896213531494, "bits_per_byte": 1.5296745786333243, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1003, "native_id": "Mercury_7182210", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1262643337249756, "logits_per_token_corr": -1.1262643337249756, "logits_per_char_corr": -0.5631321668624878, "bits_per_byte_corr": 0.8124279844981511}, "model_output": [{"sum_logits": -1.53995680809021, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.53995680809021, "logits_per_char": -0.769978404045105, "bits_per_byte": 1.1108440251082696, "num_chars": 2}, {"sum_logits": -1.1262643337249756, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": true, "logits_per_token": -1.1262643337249756, "logits_per_char": -0.5631321668624878, "bits_per_byte": 0.8124279844981511, "num_chars": 2}, {"sum_logits": -1.3248765468597412, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.3248765468597412, "logits_per_char": -0.6624382734298706, "bits_per_byte": 0.9556964119730081, "num_chars": 2}, {"sum_logits": -1.694455862045288, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.694455862045288, "logits_per_char": -0.847227931022644, "bits_per_byte": 1.2222915345898635, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1004, "native_id": "Mercury_7238945", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5333720445632935, "logits_per_token_corr": -1.5333720445632935, "logits_per_char_corr": -0.7666860222816467, "bits_per_byte_corr": 1.1060941222654117}, "model_output": [{"sum_logits": -1.3678523302078247, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": true, "logits_per_token": -1.3678523302078247, "logits_per_char": -0.6839261651039124, "bits_per_byte": 0.9866968867303296, "num_chars": 2}, {"sum_logits": -1.393361210823059, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.393361210823059, "logits_per_char": -0.6966806054115295, "bits_per_byte": 1.0050976545114565, "num_chars": 2}, {"sum_logits": -1.5333720445632935, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.5333720445632935, "logits_per_char": -0.7666860222816467, "bits_per_byte": 1.1060941222654117, "num_chars": 2}, {"sum_logits": -2.265408515930176, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -2.265408515930176, "logits_per_char": -1.132704257965088, "bits_per_byte": 1.6341468157612196, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1005, "native_id": "Mercury_SC_408748", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.046069860458374, "logits_per_token_corr": -2.046069860458374, "logits_per_char_corr": -1.023034930229187, "bits_per_byte_corr": 1.4759274204988952}, "model_output": [{"sum_logits": -1.340575933456421, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.340575933456421, "logits_per_char": -0.6702879667282104, "bits_per_byte": 0.9670211255670305, "num_chars": 2}, {"sum_logits": -1.0219686031341553, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.0219686031341553, "logits_per_char": -0.5109843015670776, "bits_per_byte": 0.7371945178434631, "num_chars": 2}, {"sum_logits": -1.436004400253296, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.436004400253296, "logits_per_char": -0.718002200126648, "bits_per_byte": 1.0358582134708243, "num_chars": 2}, {"sum_logits": -2.046069860458374, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -2.046069860458374, "logits_per_char": -1.023034930229187, "bits_per_byte": 1.4759274204988952, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1006, "native_id": "MEA_2016_5_4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.0294361114501953, "logits_per_token_corr": -2.0294361114501953, "logits_per_char_corr": -1.0147180557250977, "bits_per_byte_corr": 1.4639287068961409}, "model_output": [{"sum_logits": -1.2996922731399536, "num_tokens": 1, "num_tokens_all": 439, "is_greedy": false, "logits_per_token": -1.2996922731399536, "logits_per_char": -0.6498461365699768, "bits_per_byte": 0.9375297985710311, "num_chars": 2}, {"sum_logits": -1.1035329103469849, "num_tokens": 1, "num_tokens_all": 439, "is_greedy": true, "logits_per_token": -1.1035329103469849, "logits_per_char": -0.5517664551734924, "bits_per_byte": 0.796030728608252, "num_chars": 2}, {"sum_logits": -1.3757935762405396, "num_tokens": 1, "num_tokens_all": 439, "is_greedy": false, "logits_per_token": -1.3757935762405396, "logits_per_char": -0.6878967881202698, "bits_per_byte": 0.9924252848652722, "num_chars": 2}, {"sum_logits": -2.0294361114501953, "num_tokens": 1, "num_tokens_all": 439, "is_greedy": false, "logits_per_token": -2.0294361114501953, "logits_per_char": -1.0147180557250977, "bits_per_byte": 1.4639287068961409, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1007, "native_id": "Mercury_7271513", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.695255160331726, "logits_per_token_corr": -1.695255160331726, "logits_per_char_corr": -0.847627580165863, "bits_per_byte_corr": 1.2228681064268814}, "model_output": [{"sum_logits": -1.5406454801559448, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.5406454801559448, "logits_per_char": -0.7703227400779724, "bits_per_byte": 1.111340796995287, "num_chars": 2}, {"sum_logits": -1.0711630582809448, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.0711630582809448, "logits_per_char": -0.5355815291404724, "bits_per_byte": 0.7726808160832425, "num_chars": 2}, {"sum_logits": -1.403671145439148, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.403671145439148, "logits_per_char": -0.701835572719574, "bits_per_byte": 1.0125347002827223, "num_chars": 2}, {"sum_logits": -1.695255160331726, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.695255160331726, "logits_per_char": -0.847627580165863, "bits_per_byte": 1.2228681064268814, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1008, "native_id": "Mercury_7189000", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.584176778793335, "logits_per_token_corr": -1.584176778793335, "logits_per_char_corr": -0.7920883893966675, "bits_per_byte_corr": 1.1427419913291192}, "model_output": [{"sum_logits": -1.584176778793335, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.584176778793335, "logits_per_char": -0.7920883893966675, "bits_per_byte": 1.1427419913291192, "num_chars": 2}, {"sum_logits": -1.069976568222046, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.069976568222046, "logits_per_char": -0.534988284111023, "bits_per_byte": 0.7718249444212232, "num_chars": 2}, {"sum_logits": -1.3465783596038818, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3465783596038818, "logits_per_char": -0.6732891798019409, "bits_per_byte": 0.9713509607851556, "num_chars": 2}, {"sum_logits": -1.7468888759613037, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.7468888759613037, "logits_per_char": -0.8734444379806519, "bits_per_byte": 1.2601139591676396, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1009, "native_id": "Mercury_SC_401585", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3799957036972046, "logits_per_token_corr": -1.3799957036972046, "logits_per_char_corr": -0.6899978518486023, "bits_per_byte_corr": 0.9954564790867314}, "model_output": [{"sum_logits": -1.3799957036972046, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3799957036972046, "logits_per_char": -0.6899978518486023, "bits_per_byte": 0.9954564790867314, "num_chars": 2}, {"sum_logits": -1.2691196203231812, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.2691196203231812, "logits_per_char": -0.6345598101615906, "bits_per_byte": 0.9154762912682265, "num_chars": 2}, {"sum_logits": -1.3340648412704468, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3340648412704468, "logits_per_char": -0.6670324206352234, "bits_per_byte": 0.9623243653632892, "num_chars": 2}, {"sum_logits": -1.7245181798934937, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.7245181798934937, "logits_per_char": -0.8622590899467468, "bits_per_byte": 1.2439769130284961, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1010, "native_id": "Mercury_188528", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4255871772766113, "logits_per_token_corr": -1.4255871772766113, "logits_per_char_corr": -0.7127935886383057, "bits_per_byte_corr": 1.0283437755066702}, "model_output": [{"sum_logits": -1.4360642433166504, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4360642433166504, "logits_per_char": -0.7180321216583252, "bits_per_byte": 1.0359013811161908, "num_chars": 2}, {"sum_logits": -1.2555298805236816, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2555298805236816, "logits_per_char": -0.6277649402618408, "bits_per_byte": 0.9056733661603648, "num_chars": 2}, {"sum_logits": -1.5279545783996582, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.5279545783996582, "logits_per_char": -0.7639772891998291, "bits_per_byte": 1.1021862464811787, "num_chars": 2}, {"sum_logits": -1.4255871772766113, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4255871772766113, "logits_per_char": -0.7127935886383057, "bits_per_byte": 1.0283437755066702, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1011, "native_id": "Mercury_SC_415719", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.451130986213684, "logits_per_token_corr": -1.451130986213684, "logits_per_char_corr": -0.725565493106842, "bits_per_byte_corr": 1.0467697387461483}, "model_output": [{"sum_logits": -1.451130986213684, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.451130986213684, "logits_per_char": -0.725565493106842, "bits_per_byte": 1.0467697387461483, "num_chars": 2}, {"sum_logits": -0.9772559404373169, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -0.9772559404373169, "logits_per_char": -0.48862797021865845, "bits_per_byte": 0.7049411494746051, "num_chars": 2}, {"sum_logits": -1.3847495317459106, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3847495317459106, "logits_per_char": -0.6923747658729553, "bits_per_byte": 0.9988856411622874, "num_chars": 2}, {"sum_logits": -2.0404977798461914, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -2.0404977798461914, "logits_per_char": -1.0202488899230957, "bits_per_byte": 1.4719080139655776, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1012, "native_id": "Mercury_SC_407072", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4839624166488647, "logits_per_token_corr": -1.4839624166488647, "logits_per_char_corr": -0.7419812083244324, "bits_per_byte_corr": 1.0704526096832285}, "model_output": [{"sum_logits": -1.4839624166488647, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4839624166488647, "logits_per_char": -0.7419812083244324, "bits_per_byte": 1.0704526096832285, "num_chars": 2}, {"sum_logits": -1.1894429922103882, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.1894429922103882, "logits_per_char": -0.5947214961051941, "bits_per_byte": 0.858001753141645, "num_chars": 2}, {"sum_logits": -1.3453649282455444, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3453649282455444, "logits_per_char": -0.6726824641227722, "bits_per_byte": 0.9704756550835888, "num_chars": 2}, {"sum_logits": -1.6173776388168335, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.6173776388168335, "logits_per_char": -0.8086888194084167, "bits_per_byte": 1.1666913493837114, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1013, "native_id": "Mercury_7091823", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3514840602874756, "logits_per_token_corr": -1.3514840602874756, "logits_per_char_corr": -0.6757420301437378, "bits_per_byte_corr": 0.9748896758093113}, "model_output": [{"sum_logits": -1.3514840602874756, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3514840602874756, "logits_per_char": -0.6757420301437378, "bits_per_byte": 0.9748896758093113, "num_chars": 2}, {"sum_logits": -1.2654378414154053, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.2654378414154053, "logits_per_char": -0.6327189207077026, "bits_per_byte": 0.9128204491822757, "num_chars": 2}, {"sum_logits": -1.386993169784546, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.386993169784546, "logits_per_char": -0.693496584892273, "bits_per_byte": 1.000504083898233, "num_chars": 2}, {"sum_logits": -1.6696641445159912, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.6696641445159912, "logits_per_char": -0.8348320722579956, "bits_per_byte": 1.2044080906225323, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1014, "native_id": "Mercury_7040985", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.07704496383667, "logits_per_token_corr": -1.07704496383667, "logits_per_char_corr": -0.538522481918335, "bits_per_byte_corr": 0.7769237140713565}, "model_output": [{"sum_logits": -1.582038402557373, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.582038402557373, "logits_per_char": -0.7910192012786865, "bits_per_byte": 1.1411994789335298, "num_chars": 2}, {"sum_logits": -1.07704496383667, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.07704496383667, "logits_per_char": -0.538522481918335, "bits_per_byte": 0.7769237140713565, "num_chars": 2}, {"sum_logits": -1.265002727508545, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.265002727508545, "logits_per_char": -0.6325013637542725, "bits_per_byte": 0.9125065808444508, "num_chars": 2}, {"sum_logits": -1.8219733238220215, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.8219733238220215, "logits_per_char": -0.9109866619110107, "bits_per_byte": 1.3142759394559502, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1015, "native_id": "Mercury_SC_409383", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.0326266288757324, "logits_per_token_corr": -2.0326266288757324, "logits_per_char_corr": -1.0163133144378662, "bits_per_byte_corr": 1.4662301787299887}, "model_output": [{"sum_logits": -1.2457059621810913, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.2457059621810913, "logits_per_char": -0.6228529810905457, "bits_per_byte": 0.8985869070228831, "num_chars": 2}, {"sum_logits": -1.1978486776351929, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.1978486776351929, "logits_per_char": -0.5989243388175964, "bits_per_byte": 0.8640651734804684, "num_chars": 2}, {"sum_logits": -1.3079560995101929, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3079560995101929, "logits_per_char": -0.6539780497550964, "bits_per_byte": 0.9434908892325913, "num_chars": 2}, {"sum_logits": -2.0326266288757324, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -2.0326266288757324, "logits_per_char": -1.0163133144378662, "bits_per_byte": 1.4662301787299887, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1016, "native_id": "Mercury_SC_407080", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8006045818328857, "logits_per_token_corr": -1.8006045818328857, "logits_per_char_corr": -0.9003022909164429, "bits_per_byte_corr": 1.298861650407058}, "model_output": [{"sum_logits": -1.4493377208709717, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4493377208709717, "logits_per_char": -0.7246688604354858, "bits_per_byte": 1.0454761712376828, "num_chars": 2}, {"sum_logits": -1.1067397594451904, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.1067397594451904, "logits_per_char": -0.5533698797225952, "bits_per_byte": 0.7983439812536838, "num_chars": 2}, {"sum_logits": -1.3634192943572998, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3634192943572998, "logits_per_char": -0.6817096471786499, "bits_per_byte": 0.9834991273115097, "num_chars": 2}, {"sum_logits": -1.8006045818328857, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.8006045818328857, "logits_per_char": -0.9003022909164429, "bits_per_byte": 1.298861650407058, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1017, "native_id": "MCAS_2000_4_34", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2897617816925049, "logits_per_token_corr": -1.2897617816925049, "logits_per_char_corr": -0.6448808908462524, "bits_per_byte_corr": 0.9303664631886137}, "model_output": [{"sum_logits": -1.2897617816925049, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.2897617816925049, "logits_per_char": -0.6448808908462524, "bits_per_byte": 0.9303664631886137, "num_chars": 2}, {"sum_logits": -1.043208360671997, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.043208360671997, "logits_per_char": -0.5216041803359985, "bits_per_byte": 0.7525157642782383, "num_chars": 2}, {"sum_logits": -1.387345552444458, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.387345552444458, "logits_per_char": -0.693672776222229, "bits_per_byte": 1.0007582742562084, "num_chars": 2}, {"sum_logits": -2.1780121326446533, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -2.1780121326446533, "logits_per_char": -1.0890060663223267, "bits_per_byte": 1.571103651382347, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1018, "native_id": "Mercury_7032498", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.0418875217437744, "logits_per_token_corr": -2.0418875217437744, "logits_per_char_corr": -1.0209437608718872, "bits_per_byte_corr": 1.4729105008374577}, "model_output": [{"sum_logits": -1.0863687992095947, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.0863687992095947, "logits_per_char": -0.5431843996047974, "bits_per_byte": 0.7836494395986532, "num_chars": 2}, {"sum_logits": -1.2053911685943604, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.2053911685943604, "logits_per_char": -0.6026955842971802, "bits_per_byte": 0.8695059306318428, "num_chars": 2}, {"sum_logits": -1.5070078372955322, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.5070078372955322, "logits_per_char": -0.7535039186477661, "bits_per_byte": 1.0870763667243142, "num_chars": 2}, {"sum_logits": -2.0418875217437744, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -2.0418875217437744, "logits_per_char": -1.0209437608718872, "bits_per_byte": 1.4729105008374577, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1019, "native_id": "TAKS_2009_5_30", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.124659776687622, "logits_per_token_corr": -1.124659776687622, "logits_per_char_corr": -0.562329888343811, "bits_per_byte_corr": 0.8112705412578435}, "model_output": [{"sum_logits": -1.124659776687622, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.124659776687622, "logits_per_char": -0.562329888343811, "bits_per_byte": 0.8112705412578435, "num_chars": 2}, {"sum_logits": -1.2082722187042236, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.2082722187042236, "logits_per_char": -0.6041361093521118, "bits_per_byte": 0.8715841689848705, "num_chars": 2}, {"sum_logits": -1.5479633808135986, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.5479633808135986, "logits_per_char": -0.7739816904067993, "bits_per_byte": 1.1166195464895485, "num_chars": 2}, {"sum_logits": -1.9199230670928955, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.9199230670928955, "logits_per_char": -0.9599615335464478, "bits_per_byte": 1.3849317438926194, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1020, "native_id": "Mercury_SC_415761", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4210859537124634, "logits_per_token_corr": -1.4210859537124634, "logits_per_char_corr": -0.7105429768562317, "bits_per_byte_corr": 1.0250968290497033}, "model_output": [{"sum_logits": -1.2118440866470337, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.2118440866470337, "logits_per_char": -0.6059220433235168, "bits_per_byte": 0.8741607270687735, "num_chars": 2}, {"sum_logits": -1.2273050546646118, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.2273050546646118, "logits_per_char": -0.6136525273323059, "bits_per_byte": 0.8853134580119328, "num_chars": 2}, {"sum_logits": -1.4210859537124634, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4210859537124634, "logits_per_char": -0.7105429768562317, "bits_per_byte": 1.0250968290497033, "num_chars": 2}, {"sum_logits": -1.8494306802749634, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.8494306802749634, "logits_per_char": -0.9247153401374817, "bits_per_byte": 1.3340822354512543, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1021, "native_id": "ACTAAP_2008_5_10", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.0031042098999023, "logits_per_token_corr": -2.0031042098999023, "logits_per_char_corr": -1.0015521049499512, "bits_per_byte_corr": 1.4449342550042352}, "model_output": [{"sum_logits": -1.1491881608963013, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.1491881608963013, "logits_per_char": -0.5745940804481506, "bits_per_byte": 0.8289640303872966, "num_chars": 2}, {"sum_logits": -1.154358983039856, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.154358983039856, "logits_per_char": -0.577179491519928, "bits_per_byte": 0.8326939901192119, "num_chars": 2}, {"sum_logits": -1.49640953540802, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.49640953540802, "logits_per_char": -0.74820476770401, "bits_per_byte": 1.0794313079368296, "num_chars": 2}, {"sum_logits": -2.0031042098999023, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -2.0031042098999023, "logits_per_char": -1.0015521049499512, "bits_per_byte": 1.4449342550042352, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1022, "native_id": "Mercury_416671", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.514430284500122, "logits_per_token_corr": -1.514430284500122, "logits_per_char_corr": -0.757215142250061, "bits_per_byte_corr": 1.092430530610979}, "model_output": [{"sum_logits": -1.6846020221710205, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.6846020221710205, "logits_per_char": -0.8423010110855103, "bits_per_byte": 1.2151834916296984, "num_chars": 2}, {"sum_logits": -1.3094685077667236, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3094685077667236, "logits_per_char": -0.6547342538833618, "bits_per_byte": 0.9445818611783402, "num_chars": 2}, {"sum_logits": -1.1600048542022705, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.1600048542022705, "logits_per_char": -0.5800024271011353, "bits_per_byte": 0.8367666252829715, "num_chars": 2}, {"sum_logits": -1.514430284500122, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.514430284500122, "logits_per_char": -0.757215142250061, "bits_per_byte": 1.092430530610979, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1023, "native_id": "Mercury_400803", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4592598676681519, "logits_per_token_corr": -1.4592598676681519, "logits_per_char_corr": -0.7296299338340759, "bits_per_byte_corr": 1.05263348722732}, "model_output": [{"sum_logits": -1.6733120679855347, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.6733120679855347, "logits_per_char": -0.8366560339927673, "bits_per_byte": 1.2070395111720607, "num_chars": 2}, {"sum_logits": -1.4183911085128784, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.4183911085128784, "logits_per_char": -0.7091955542564392, "bits_per_byte": 1.0231529091469997, "num_chars": 2}, {"sum_logits": -1.8400505781173706, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.8400505781173706, "logits_per_char": -0.9200252890586853, "bits_per_byte": 1.327315922018354, "num_chars": 2}, {"sum_logits": -1.4592598676681519, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4592598676681519, "logits_per_char": -0.7296299338340759, "bits_per_byte": 1.05263348722732, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1024, "native_id": "Mercury_7005880", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3685156106948853, "logits_per_token_corr": -1.3685156106948853, "logits_per_char_corr": -0.6842578053474426, "bits_per_byte_corr": 0.9871753424650304}, "model_output": [{"sum_logits": -1.2801405191421509, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.2801405191421509, "logits_per_char": -0.6400702595710754, "bits_per_byte": 0.9234261893043655, "num_chars": 2}, {"sum_logits": -1.083290696144104, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.083290696144104, "logits_per_char": -0.541645348072052, "bits_per_byte": 0.7814290575846873, "num_chars": 2}, {"sum_logits": -1.3685156106948853, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3685156106948853, "logits_per_char": -0.6842578053474426, "bits_per_byte": 0.9871753424650304, "num_chars": 2}, {"sum_logits": -2.1295933723449707, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -2.1295933723449707, "logits_per_char": -1.0647966861724854, "bits_per_byte": 1.5361768986971502, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1025, "native_id": "Mercury_7210508", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4046049118041992, "logits_per_token_corr": -1.4046049118041992, "logits_per_char_corr": -0.7023024559020996, "bits_per_byte_corr": 1.0132082703348269}, "model_output": [{"sum_logits": -1.3301191329956055, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.3301191329956055, "logits_per_char": -0.6650595664978027, "bits_per_byte": 0.9594781384828331, "num_chars": 2}, {"sum_logits": -1.0798559188842773, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.0798559188842773, "logits_per_char": -0.5399279594421387, "bits_per_byte": 0.7789513895250304, "num_chars": 2}, {"sum_logits": -1.4046049118041992, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.4046049118041992, "logits_per_char": -0.7023024559020996, "bits_per_byte": 1.0132082703348269, "num_chars": 2}, {"sum_logits": -1.9889764785766602, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.9889764785766602, "logits_per_char": -0.9944882392883301, "bits_per_byte": 1.4347432510447014, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1026, "native_id": "NYSEDREGENTS_2013_4_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5010124444961548, "logits_per_token_corr": -1.5010124444961548, "logits_per_char_corr": -0.7505062222480774, "bits_per_byte_corr": 1.0827516049943893}, "model_output": [{"sum_logits": -1.192844033241272, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.192844033241272, "logits_per_char": -0.596422016620636, "bits_per_byte": 0.8604550856562047, "num_chars": 2}, {"sum_logits": -1.1554638147354126, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.1554638147354126, "logits_per_char": -0.5777319073677063, "bits_per_byte": 0.8334909577233107, "num_chars": 2}, {"sum_logits": -1.5010124444961548, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.5010124444961548, "logits_per_char": -0.7505062222480774, "bits_per_byte": 1.0827516049943893, "num_chars": 2}, {"sum_logits": -1.9268871545791626, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.9268871545791626, "logits_per_char": -0.9634435772895813, "bits_per_byte": 1.3899552711330003, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1027, "native_id": "NYSEDREGENTS_2008_4_12", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1458054780960083, "logits_per_token_corr": -1.1458054780960083, "logits_per_char_corr": -0.5729027390480042, "bits_per_byte_corr": 0.8265239405368533}, "model_output": [{"sum_logits": -1.602047085762024, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.602047085762024, "logits_per_char": -0.801023542881012, "bits_per_byte": 1.155632692950574, "num_chars": 2}, {"sum_logits": -1.225265622138977, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.225265622138977, "logits_per_char": -0.6126328110694885, "bits_per_byte": 0.8838423184164513, "num_chars": 2}, {"sum_logits": -1.1458054780960083, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.1458054780960083, "logits_per_char": -0.5729027390480042, "bits_per_byte": 0.8265239405368533, "num_chars": 2}, {"sum_logits": -1.7710000276565552, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.7710000276565552, "logits_per_char": -0.8855000138282776, "bits_per_byte": 1.2775064786580823, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1028, "native_id": "Mercury_400091", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.399937629699707, "logits_per_token_corr": -1.399937629699707, "logits_per_char_corr": -0.6999688148498535, "bits_per_byte_corr": 1.0098415379615342}, "model_output": [{"sum_logits": -1.2513952255249023, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.2513952255249023, "logits_per_char": -0.6256976127624512, "bits_per_byte": 0.9026908430290999, "num_chars": 2}, {"sum_logits": -1.399937629699707, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.399937629699707, "logits_per_char": -0.6999688148498535, "bits_per_byte": 1.0098415379615342, "num_chars": 2}, {"sum_logits": -1.7009553909301758, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.7009553909301758, "logits_per_char": -0.8504776954650879, "bits_per_byte": 1.2269799536350379, "num_chars": 2}, {"sum_logits": -2.0286436080932617, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -2.0286436080932617, "logits_per_char": -1.0143218040466309, "bits_per_byte": 1.4633570365646726, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1029, "native_id": "Mercury_SC_402257", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7387007474899292, "logits_per_token_corr": -1.7387007474899292, "logits_per_char_corr": -0.8693503737449646, "bits_per_byte_corr": 1.2542074729977284}, "model_output": [{"sum_logits": -1.4732435941696167, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4732435941696167, "logits_per_char": -0.7366217970848083, "bits_per_byte": 1.0627206136657328, "num_chars": 2}, {"sum_logits": -1.2103575468063354, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.2103575468063354, "logits_per_char": -0.6051787734031677, "bits_per_byte": 0.8730884152406431, "num_chars": 2}, {"sum_logits": -1.2887934446334839, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.2887934446334839, "logits_per_char": -0.6443967223167419, "bits_per_byte": 0.9296679556521339, "num_chars": 2}, {"sum_logits": -1.7387007474899292, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.7387007474899292, "logits_per_char": -0.8693503737449646, "bits_per_byte": 1.2542074729977284, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1030, "native_id": "Mercury_7227815", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.1603689193725586, "logits_per_token_corr": -2.1603689193725586, "logits_per_char_corr": -1.0801844596862793, "bits_per_byte_corr": 1.5583767632358392}, "model_output": [{"sum_logits": -0.960774302482605, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -0.960774302482605, "logits_per_char": -0.4803871512413025, "bits_per_byte": 0.6930521608031015, "num_chars": 2}, {"sum_logits": -1.340290904045105, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.340290904045105, "logits_per_char": -0.6701454520225525, "bits_per_byte": 0.9668155203079238, "num_chars": 2}, {"sum_logits": -1.502787470817566, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.502787470817566, "logits_per_char": -0.751393735408783, "bits_per_byte": 1.0840320158300638, "num_chars": 2}, {"sum_logits": -2.1603689193725586, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -2.1603689193725586, "logits_per_char": -1.0801844596862793, "bits_per_byte": 1.5583767632358392, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1031, "native_id": "ACTAAP_2010_7_3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.511162519454956, "logits_per_token_corr": -1.511162519454956, "logits_per_char_corr": -0.755581259727478, "bits_per_byte_corr": 1.0900733363982515}, "model_output": [{"sum_logits": -1.511162519454956, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.511162519454956, "logits_per_char": -0.755581259727478, "bits_per_byte": 1.0900733363982515, "num_chars": 2}, {"sum_logits": -1.0782439708709717, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.0782439708709717, "logits_per_char": -0.5391219854354858, "bits_per_byte": 0.7777886148225461, "num_chars": 2}, {"sum_logits": -1.2534377574920654, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.2534377574920654, "logits_per_char": -0.6267188787460327, "bits_per_byte": 0.9041642183990426, "num_chars": 2}, {"sum_logits": -1.9372851848602295, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.9372851848602295, "logits_per_char": -0.9686425924301147, "bits_per_byte": 1.3974558644937598, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1032, "native_id": "Mercury_SC_410905", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6026084423065186, "logits_per_token_corr": -1.6026084423065186, "logits_per_char_corr": -0.8013042211532593, "bits_per_byte_corr": 1.1560376261020309}, "model_output": [{"sum_logits": -1.4140956401824951, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.4140956401824951, "logits_per_char": -0.7070478200912476, "bits_per_byte": 1.0200543837177276, "num_chars": 2}, {"sum_logits": -1.214257001876831, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": true, "logits_per_token": -1.214257001876831, "logits_per_char": -0.6071285009384155, "bits_per_byte": 0.8759012774868318, "num_chars": 2}, {"sum_logits": -1.425551176071167, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.425551176071167, "logits_per_char": -0.7127755880355835, "bits_per_byte": 1.0283178061263898, "num_chars": 2}, {"sum_logits": -1.6026084423065186, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.6026084423065186, "logits_per_char": -0.8013042211532593, "bits_per_byte": 1.1560376261020309, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1033, "native_id": "OHAT_2010_5_18", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6983364820480347, "logits_per_token_corr": -1.6983364820480347, "logits_per_char_corr": -0.8491682410240173, "bits_per_byte_corr": 1.225090810206634}, "model_output": [{"sum_logits": -1.469981074333191, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.469981074333191, "logits_per_char": -0.7349905371665955, "bits_per_byte": 1.0603672030713245, "num_chars": 2}, {"sum_logits": -1.24437415599823, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": true, "logits_per_token": -1.24437415599823, "logits_per_char": -0.622187077999115, "bits_per_byte": 0.8976262119351628, "num_chars": 2}, {"sum_logits": -1.2771881818771362, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.2771881818771362, "logits_per_char": -0.6385940909385681, "bits_per_byte": 0.9212965281387299, "num_chars": 2}, {"sum_logits": -1.6983364820480347, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.6983364820480347, "logits_per_char": -0.8491682410240173, "bits_per_byte": 1.225090810206634, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1034, "native_id": "NAEP_2000_8_S11+10", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0820066928863525, "logits_per_token_corr": -1.0820066928863525, "logits_per_char_corr": -0.5410033464431763, "bits_per_byte_corr": 0.780502845018465}, "model_output": [{"sum_logits": -1.0820066928863525, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.0820066928863525, "logits_per_char": -0.5410033464431763, "bits_per_byte": 0.780502845018465, "num_chars": 2}, {"sum_logits": -1.3228800296783447, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3228800296783447, "logits_per_char": -0.6614400148391724, "bits_per_byte": 0.9542562292546819, "num_chars": 2}, {"sum_logits": -1.53615403175354, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.53615403175354, "logits_per_char": -0.76807701587677, "bits_per_byte": 1.1081009018270058, "num_chars": 2}, {"sum_logits": -1.8760302066802979, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.8760302066802979, "logits_per_char": -0.9380151033401489, "bits_per_byte": 1.3532697378687537, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1035, "native_id": "MCAS_2003_8_29", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2288028001785278, "logits_per_token_corr": -1.2288028001785278, "logits_per_char_corr": -0.6144014000892639, "bits_per_byte_corr": 0.8863938530246538}, "model_output": [{"sum_logits": -1.6852294206619263, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.6852294206619263, "logits_per_char": -0.8426147103309631, "bits_per_byte": 1.2156360639754442, "num_chars": 2}, {"sum_logits": -1.2288028001785278, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.2288028001785278, "logits_per_char": -0.6144014000892639, "bits_per_byte": 0.8863938530246538, "num_chars": 2}, {"sum_logits": -1.194733738899231, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.194733738899231, "logits_per_char": -0.5973668694496155, "bits_per_byte": 0.8618182201469442, "num_chars": 2}, {"sum_logits": -1.655842900276184, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.655842900276184, "logits_per_char": -0.827921450138092, "bits_per_byte": 1.1944381703606828, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1036, "native_id": "Mercury_401433", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3253766298294067, "logits_per_token_corr": -1.3253766298294067, "logits_per_char_corr": -0.6626883149147034, "bits_per_byte_corr": 0.9560571455831931}, "model_output": [{"sum_logits": -1.4444981813430786, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4444981813430786, "logits_per_char": -0.7222490906715393, "bits_per_byte": 1.0419851813991416, "num_chars": 2}, {"sum_logits": -1.3815232515335083, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3815232515335083, "logits_per_char": -0.6907616257667542, "bits_per_byte": 0.9965583719308102, "num_chars": 2}, {"sum_logits": -1.4842416048049927, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4842416048049927, "logits_per_char": -0.7421208024024963, "bits_per_byte": 1.070654001367389, "num_chars": 2}, {"sum_logits": -1.3253766298294067, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.3253766298294067, "logits_per_char": -0.6626883149147034, "bits_per_byte": 0.9560571455831931, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1037, "native_id": "TIMSS_1995_8_N4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2599825859069824, "logits_per_token_corr": -1.2599825859069824, "logits_per_char_corr": -0.6299912929534912, "bits_per_byte_corr": 0.9088853141478809}, "model_output": [{"sum_logits": -1.1551432609558105, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.1551432609558105, "logits_per_char": -0.5775716304779053, "bits_per_byte": 0.8332597270492255, "num_chars": 2}, {"sum_logits": -1.2599825859069824, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.2599825859069824, "logits_per_char": -0.6299912929534912, "bits_per_byte": 0.9088853141478809, "num_chars": 2}, {"sum_logits": -1.6998381614685059, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.6998381614685059, "logits_per_char": -0.8499190807342529, "bits_per_byte": 1.226174042933094, "num_chars": 2}, {"sum_logits": -1.7147841453552246, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.7147841453552246, "logits_per_char": -0.8573920726776123, "bits_per_byte": 1.2369552913503896, "num_chars": 2}, {"sum_logits": -3.7471938133239746, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -3.7471938133239746, "logits_per_char": -1.8735969066619873, "bits_per_byte": 2.7030289658680933, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1038, "native_id": "Mercury_SC_405885", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3857630491256714, "logits_per_token_corr": -1.3857630491256714, "logits_per_char_corr": -0.6928815245628357, "bits_per_byte_corr": 0.9996167394111057}, "model_output": [{"sum_logits": -1.1424940824508667, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.1424940824508667, "logits_per_char": -0.5712470412254333, "bits_per_byte": 0.824135273499018, "num_chars": 2}, {"sum_logits": -1.2700365781784058, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.2700365781784058, "logits_per_char": -0.6350182890892029, "bits_per_byte": 0.9161377365434453, "num_chars": 2}, {"sum_logits": -1.3857630491256714, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3857630491256714, "logits_per_char": -0.6928815245628357, "bits_per_byte": 0.9996167394111057, "num_chars": 2}, {"sum_logits": -1.9902266263961792, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.9902266263961792, "logits_per_char": -0.9951133131980896, "bits_per_byte": 1.435645042074501, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1039, "native_id": "Mercury_7263638", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0435367822647095, "logits_per_token_corr": -1.0435367822647095, "logits_per_char_corr": -0.5217683911323547, "bits_per_byte_corr": 0.752752670379802}, "model_output": [{"sum_logits": -1.3801189661026, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3801189661026, "logits_per_char": -0.6900594830513, "bits_per_byte": 0.9955453941172275, "num_chars": 2}, {"sum_logits": -1.0435367822647095, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.0435367822647095, "logits_per_char": -0.5217683911323547, "bits_per_byte": 0.752752670379802, "num_chars": 2}, {"sum_logits": -1.5251575708389282, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.5251575708389282, "logits_per_char": -0.7625787854194641, "bits_per_byte": 1.1001686320125803, "num_chars": 2}, {"sum_logits": -1.833722710609436, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.833722710609436, "logits_per_char": -0.916861355304718, "bits_per_byte": 1.3227513304818008, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1040, "native_id": "Mercury_401428", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0188043117523193, "logits_per_token_corr": -1.0188043117523193, "logits_per_char_corr": -0.5094021558761597, "bits_per_byte_corr": 0.7349119641012103}, "model_output": [{"sum_logits": -1.2585694789886475, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.2585694789886475, "logits_per_char": -0.6292847394943237, "bits_per_byte": 0.9078659729762163, "num_chars": 2}, {"sum_logits": -1.0188043117523193, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.0188043117523193, "logits_per_char": -0.5094021558761597, "bits_per_byte": 0.7349119641012103, "num_chars": 2}, {"sum_logits": -1.4727609157562256, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4727609157562256, "logits_per_char": -0.7363804578781128, "bits_per_byte": 1.0623724347890608, "num_chars": 2}, {"sum_logits": -2.155364751815796, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -2.155364751815796, "logits_per_char": -1.077682375907898, "bits_per_byte": 1.554767019376877, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1041, "native_id": "Mercury_SC_402121", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0747504234313965, "logits_per_token_corr": -1.0747504234313965, "logits_per_char_corr": -0.5373752117156982, "bits_per_byte_corr": 0.7752685530394516}, "model_output": [{"sum_logits": -1.2925734519958496, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.2925734519958496, "logits_per_char": -0.6462867259979248, "bits_per_byte": 0.9323946545902403, "num_chars": 2}, {"sum_logits": -1.0747504234313965, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.0747504234313965, "logits_per_char": -0.5373752117156982, "bits_per_byte": 0.7752685530394516, "num_chars": 2}, {"sum_logits": -1.497220516204834, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.497220516204834, "logits_per_char": -0.748610258102417, "bits_per_byte": 1.0800163069237398, "num_chars": 2}, {"sum_logits": -1.91127347946167, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.91127347946167, "logits_per_char": -0.955636739730835, "bits_per_byte": 1.378692385301963, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1042, "native_id": "NYSEDREGENTS_2015_4_7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1802338361740112, "logits_per_token_corr": -1.1802338361740112, "logits_per_char_corr": -0.5901169180870056, "bits_per_byte_corr": 0.8513587512694133}, "model_output": [{"sum_logits": -1.1802338361740112, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.1802338361740112, "logits_per_char": -0.5901169180870056, "bits_per_byte": 0.8513587512694133, "num_chars": 2}, {"sum_logits": -1.0928486585617065, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.0928486585617065, "logits_per_char": -0.5464243292808533, "bits_per_byte": 0.7883236700751314, "num_chars": 2}, {"sum_logits": -1.6145180463790894, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.6145180463790894, "logits_per_char": -0.8072590231895447, "bits_per_byte": 1.1646285894692616, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1043, "native_id": "MCAS_2012_5_23614", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.225932240486145, "logits_per_token_corr": -1.225932240486145, "logits_per_char_corr": -0.6129661202430725, "bits_per_byte_corr": 0.8843231819082641}, "model_output": [{"sum_logits": -1.3932486772537231, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3932486772537231, "logits_per_char": -0.6966243386268616, "bits_per_byte": 1.0050164787002491, "num_chars": 2}, {"sum_logits": -1.2060195207595825, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.2060195207595825, "logits_per_char": -0.6030097603797913, "bits_per_byte": 0.869959190908192, "num_chars": 2}, {"sum_logits": -1.225932240486145, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.225932240486145, "logits_per_char": -0.6129661202430725, "bits_per_byte": 0.8843231819082641, "num_chars": 2}, {"sum_logits": -1.910187840461731, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.910187840461731, "logits_per_char": -0.9550939202308655, "bits_per_byte": 1.3779092623012588, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1044, "native_id": "Mercury_407262", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3486669063568115, "logits_per_token_corr": -1.3486669063568115, "logits_per_char_corr": -0.6743334531784058, "bits_per_byte_corr": 0.9728575288067149}, "model_output": [{"sum_logits": -1.3486669063568115, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3486669063568115, "logits_per_char": -0.6743334531784058, "bits_per_byte": 0.9728575288067149, "num_chars": 2}, {"sum_logits": -1.216338872909546, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.216338872909546, "logits_per_char": -0.608169436454773, "bits_per_byte": 0.8774030299941669, "num_chars": 2}, {"sum_logits": -1.4243981838226318, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4243981838226318, "logits_per_char": -0.7121990919113159, "bits_per_byte": 1.0274860980268168, "num_chars": 2}, {"sum_logits": -1.6655690670013428, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.6655690670013428, "logits_per_char": -0.8327845335006714, "bits_per_byte": 1.2014541166113106, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1045, "native_id": "MCAS_2014_8_6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.8859251737594604, "logits_per_token_corr": -0.8859251737594604, "logits_per_char_corr": -0.4429625868797302, "bits_per_byte_corr": 0.6390599273911926}, "model_output": [{"sum_logits": -1.4414976835250854, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.4414976835250854, "logits_per_char": -0.7207488417625427, "bits_per_byte": 1.0398207797380317, "num_chars": 2}, {"sum_logits": -0.8859251737594604, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": true, "logits_per_token": -0.8859251737594604, "logits_per_char": -0.4429625868797302, "bits_per_byte": 0.6390599273911926, "num_chars": 2}, {"sum_logits": -1.5628660917282104, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.5628660917282104, "logits_per_char": -0.7814330458641052, "bits_per_byte": 1.1273695800557124, "num_chars": 2}, {"sum_logits": -2.032052993774414, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -2.032052993774414, "logits_per_char": -1.016026496887207, "bits_per_byte": 1.4658163884720126, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1046, "native_id": "Mercury_7032515", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.725490927696228, "logits_per_token_corr": -1.725490927696228, "logits_per_char_corr": -0.862745463848114, "bits_per_byte_corr": 1.2446786022440168}, "model_output": [{"sum_logits": -1.2429279088974, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.2429279088974, "logits_per_char": -0.6214639544487, "bits_per_byte": 0.8965829651750282, "num_chars": 2}, {"sum_logits": -1.1635135412216187, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.1635135412216187, "logits_per_char": -0.5817567706108093, "bits_per_byte": 0.8392976079643959, "num_chars": 2}, {"sum_logits": -1.5927642583847046, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5927642583847046, "logits_per_char": -0.7963821291923523, "bits_per_byte": 1.1489365484392258, "num_chars": 2}, {"sum_logits": -1.725490927696228, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.725490927696228, "logits_per_char": -0.862745463848114, "bits_per_byte": 1.2446786022440168, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1047, "native_id": "Mercury_7270165", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3784387111663818, "logits_per_token_corr": -1.3784387111663818, "logits_per_char_corr": -0.6892193555831909, "bits_per_byte_corr": 0.994333346385271}, "model_output": [{"sum_logits": -1.4258172512054443, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.4258172512054443, "logits_per_char": -0.7129086256027222, "bits_per_byte": 1.0285097387647528, "num_chars": 2}, {"sum_logits": -0.9839398264884949, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": true, "logits_per_token": -0.9839398264884949, "logits_per_char": -0.49196991324424744, "bits_per_byte": 0.7097625541045592, "num_chars": 2}, {"sum_logits": -1.3784387111663818, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.3784387111663818, "logits_per_char": -0.6892193555831909, "bits_per_byte": 0.994333346385271, "num_chars": 2}, {"sum_logits": -2.083698034286499, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -2.083698034286499, "logits_per_char": -1.0418490171432495, "bits_per_byte": 1.5030704103886865, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1048, "native_id": "Mercury_7017045", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.286842703819275, "logits_per_token_corr": -1.286842703819275, "logits_per_char_corr": -0.6434213519096375, "bits_per_byte_corr": 0.9282607936027734}, "model_output": [{"sum_logits": -1.436235785484314, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.436235785484314, "logits_per_char": -0.718117892742157, "bits_per_byte": 1.0360251226334867, "num_chars": 2}, {"sum_logits": -1.286842703819275, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.286842703819275, "logits_per_char": -0.6434213519096375, "bits_per_byte": 0.9282607936027734, "num_chars": 2}, {"sum_logits": -1.4274734258651733, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4274734258651733, "logits_per_char": -0.7137367129325867, "bits_per_byte": 1.0297044162489721, "num_chars": 2}, {"sum_logits": -1.5469764471054077, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5469764471054077, "logits_per_char": -0.7734882235527039, "bits_per_byte": 1.1159076243063015, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1049, "native_id": "Mercury_SC_400386", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3809281587600708, "logits_per_token_corr": -1.3809281587600708, "logits_per_char_corr": -0.6904640793800354, "bits_per_byte_corr": 0.9961291032342563}, "model_output": [{"sum_logits": -1.3809281587600708, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3809281587600708, "logits_per_char": -0.6904640793800354, "bits_per_byte": 0.9961291032342563, "num_chars": 2}, {"sum_logits": -0.9728356599807739, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -0.9728356599807739, "logits_per_char": -0.48641782999038696, "bits_per_byte": 0.7017525911276064, "num_chars": 2}, {"sum_logits": -1.4518414735794067, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4518414735794067, "logits_per_char": -0.7259207367897034, "bits_per_byte": 1.04728224704572, "num_chars": 2}, {"sum_logits": -2.0866432189941406, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -2.0866432189941406, "logits_per_char": -1.0433216094970703, "bits_per_byte": 1.5051949120747963, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1050, "native_id": "Mercury_400750", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7908599376678467, "logits_per_token_corr": -1.7908599376678467, "logits_per_char_corr": -0.8954299688339233, "bits_per_byte_corr": 1.2918323755009884}, "model_output": [{"sum_logits": -1.6507556438446045, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.6507556438446045, "logits_per_char": -0.8253778219223022, "bits_per_byte": 1.1907684905478948, "num_chars": 2}, {"sum_logits": -1.4386413097381592, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.4386413097381592, "logits_per_char": -0.7193206548690796, "bits_per_byte": 1.0377603415893684, "num_chars": 2}, {"sum_logits": -1.5246703624725342, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.5246703624725342, "logits_per_char": -0.7623351812362671, "bits_per_byte": 1.0998171854655419, "num_chars": 2}, {"sum_logits": -1.7908599376678467, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.7908599376678467, "logits_per_char": -0.8954299688339233, "bits_per_byte": 1.2918323755009884, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1051, "native_id": "MCAS_2006_9_28-v1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3926935195922852, "logits_per_token_corr": -1.3926935195922852, "logits_per_char_corr": -0.6963467597961426, "bits_per_byte_corr": 1.0046160170977148}, "model_output": [{"sum_logits": -1.4959383010864258, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.4959383010864258, "logits_per_char": -0.7479691505432129, "bits_per_byte": 1.079091384227399, "num_chars": 2}, {"sum_logits": -0.9136552214622498, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": true, "logits_per_token": -0.9136552214622498, "logits_per_char": -0.4568276107311249, "bits_per_byte": 0.6590629285434212, "num_chars": 2}, {"sum_logits": -1.3926935195922852, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.3926935195922852, "logits_per_char": -0.6963467597961426, "bits_per_byte": 1.0046160170977148, "num_chars": 2}, {"sum_logits": -2.132472038269043, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -2.132472038269043, "logits_per_char": -1.0662360191345215, "bits_per_byte": 1.5382534172236693, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1052, "native_id": "Mercury_416376", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7452622652053833, "logits_per_token_corr": -1.7452622652053833, "logits_per_char_corr": -0.8726311326026917, "bits_per_byte_corr": 1.2589406075321272}, "model_output": [{"sum_logits": -1.385769009590149, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.385769009590149, "logits_per_char": -0.6928845047950745, "bits_per_byte": 0.9996210389773773, "num_chars": 2}, {"sum_logits": -1.226741909980774, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.226741909980774, "logits_per_char": -0.613370954990387, "bits_per_byte": 0.8849072349905946, "num_chars": 2}, {"sum_logits": -1.3308106660842896, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3308106660842896, "logits_per_char": -0.6654053330421448, "bits_per_byte": 0.9599769741616611, "num_chars": 2}, {"sum_logits": -1.7452622652053833, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.7452622652053833, "logits_per_char": -0.8726311326026917, "bits_per_byte": 1.2589406075321272, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1053, "native_id": "Mercury_7086520", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.357635736465454, "logits_per_token_corr": -1.357635736465454, "logits_per_char_corr": -0.678817868232727, "bits_per_byte_corr": 0.9793271721668767}, "model_output": [{"sum_logits": -1.357635736465454, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.357635736465454, "logits_per_char": -0.678817868232727, "bits_per_byte": 0.9793271721668767, "num_chars": 2}, {"sum_logits": -1.2713987827301025, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.2713987827301025, "logits_per_char": -0.6356993913650513, "bits_per_byte": 0.9171203594191507, "num_chars": 2}, {"sum_logits": -1.2496302127838135, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.2496302127838135, "logits_per_char": -0.6248151063919067, "bits_per_byte": 0.9014176554647615, "num_chars": 2}, {"sum_logits": -1.8592455387115479, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.8592455387115479, "logits_per_char": -0.9296227693557739, "bits_per_byte": 1.3411621592480032, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1054, "native_id": "Mercury_7014333", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5362582206726074, "logits_per_token_corr": -1.5362582206726074, "logits_per_char_corr": -0.7681291103363037, "bits_per_byte_corr": 1.108176058245433}, "model_output": [{"sum_logits": -1.5362582206726074, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5362582206726074, "logits_per_char": -0.7681291103363037, "bits_per_byte": 1.108176058245433, "num_chars": 2}, {"sum_logits": -1.1330103874206543, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.1330103874206543, "logits_per_char": -0.5665051937103271, "bits_per_byte": 0.8172942336043177, "num_chars": 2}, {"sum_logits": -1.233750820159912, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.233750820159912, "logits_per_char": -0.616875410079956, "bits_per_byte": 0.8899630949693377, "num_chars": 2}, {"sum_logits": -1.8598742485046387, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.8598742485046387, "logits_per_char": -0.9299371242523193, "bits_per_byte": 1.3416156774983288, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1055, "native_id": "Mercury_SC_406623", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4263429641723633, "logits_per_token_corr": -1.4263429641723633, "logits_per_char_corr": -0.7131714820861816, "bits_per_byte_corr": 1.0288889605099056}, "model_output": [{"sum_logits": -1.4263429641723633, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4263429641723633, "logits_per_char": -0.7131714820861816, "bits_per_byte": 1.0288889605099056, "num_chars": 2}, {"sum_logits": -1.1094675064086914, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.1094675064086914, "logits_per_char": -0.5547337532043457, "bits_per_byte": 0.8003116347622067, "num_chars": 2}, {"sum_logits": -1.395157814025879, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.395157814025879, "logits_per_char": -0.6975789070129395, "bits_per_byte": 1.0063936297770342, "num_chars": 2}, {"sum_logits": -1.8169336318969727, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.8169336318969727, "logits_per_char": -0.9084668159484863, "bits_per_byte": 1.3106405701820094, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1056, "native_id": "Mercury_7042648", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4159095287322998, "logits_per_token_corr": -1.4159095287322998, "logits_per_char_corr": -0.7079547643661499, "bits_per_byte_corr": 1.021362827725493}, "model_output": [{"sum_logits": -1.4159095287322998, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4159095287322998, "logits_per_char": -0.7079547643661499, "bits_per_byte": 1.021362827725493, "num_chars": 2}, {"sum_logits": -1.3334243297576904, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3334243297576904, "logits_per_char": -0.6667121648788452, "bits_per_byte": 0.9618623339717459, "num_chars": 2}, {"sum_logits": -1.186354398727417, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.186354398727417, "logits_per_char": -0.5931771993637085, "bits_per_byte": 0.8557738038910411, "num_chars": 2}, {"sum_logits": -1.7390162944793701, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.7390162944793701, "logits_per_char": -0.8695081472396851, "bits_per_byte": 1.2544350920361456, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1057, "native_id": "MCAS_2004_8_23", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.438193678855896, "logits_per_token_corr": -1.438193678855896, "logits_per_char_corr": -0.719096839427948, "bits_per_byte_corr": 1.037437444162373}, "model_output": [{"sum_logits": -1.6561616659164429, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.6561616659164429, "logits_per_char": -0.8280808329582214, "bits_per_byte": 1.1946681111648865, "num_chars": 2}, {"sum_logits": -1.438193678855896, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.438193678855896, "logits_per_char": -0.719096839427948, "bits_per_byte": 1.037437444162373, "num_chars": 2}, {"sum_logits": -1.3798669576644897, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3798669576644897, "logits_per_char": -0.6899334788322449, "bits_per_byte": 0.9953636084552654, "num_chars": 2}, {"sum_logits": -1.3134301900863647, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.3134301900863647, "logits_per_char": -0.6567150950431824, "bits_per_byte": 0.9474396108964042, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1058, "native_id": "MCAS_2013_8_29425", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2603702545166016, "logits_per_token_corr": -1.2603702545166016, "logits_per_char_corr": -0.6301851272583008, "bits_per_byte_corr": 0.9091649579381841}, "model_output": [{"sum_logits": -1.2603702545166016, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.2603702545166016, "logits_per_char": -0.6301851272583008, "bits_per_byte": 0.9091649579381841, "num_chars": 2}, {"sum_logits": -1.0922660827636719, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.0922660827636719, "logits_per_char": -0.5461330413818359, "bits_per_byte": 0.7879034304677478, "num_chars": 2}, {"sum_logits": -1.4431114196777344, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4431114196777344, "logits_per_char": -0.7215557098388672, "bits_per_byte": 1.0409848443103973, "num_chars": 2}, {"sum_logits": -2.0298194885253906, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -2.0298194885253906, "logits_per_char": -1.0149097442626953, "bits_per_byte": 1.4642052549987286, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1059, "native_id": "MEAP_2005_5_15", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0239163637161255, "logits_per_token_corr": -1.0239163637161255, "logits_per_char_corr": -0.5119581818580627, "bits_per_byte_corr": 0.7385995301096878}, "model_output": [{"sum_logits": -1.5981637239456177, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.5981637239456177, "logits_per_char": -0.7990818619728088, "bits_per_byte": 1.1528314395333188, "num_chars": 2}, {"sum_logits": -1.0239163637161255, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.0239163637161255, "logits_per_char": -0.5119581818580627, "bits_per_byte": 0.7385995301096878, "num_chars": 2}, {"sum_logits": -1.2840138673782349, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.2840138673782349, "logits_per_char": -0.6420069336891174, "bits_per_byte": 0.9262202194502848, "num_chars": 2}, {"sum_logits": -1.9089661836624146, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.9089661836624146, "logits_per_char": -0.9544830918312073, "bits_per_byte": 1.377028023198237, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1060, "native_id": "Mercury_7016258", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.035788893699646, "logits_per_token_corr": -1.035788893699646, "logits_per_char_corr": -0.517894446849823, "bits_per_byte_corr": 0.7471637501747093}, "model_output": [{"sum_logits": -1.2791608572006226, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.2791608572006226, "logits_per_char": -0.6395804286003113, "bits_per_byte": 0.9227195125919697, "num_chars": 2}, {"sum_logits": -1.035788893699646, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.035788893699646, "logits_per_char": -0.517894446849823, "bits_per_byte": 0.7471637501747093, "num_chars": 2}, {"sum_logits": -1.6391748189926147, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.6391748189926147, "logits_per_char": -0.8195874094963074, "bits_per_byte": 1.1824146912562044, "num_chars": 2}, {"sum_logits": -1.8208078145980835, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.8208078145980835, "logits_per_char": -0.9104039072990417, "bits_per_byte": 1.3134352022672067, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1061, "native_id": "NCEOGA_2013_8_5", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5364125967025757, "logits_per_token_corr": -1.5364125967025757, "logits_per_char_corr": -0.7682062983512878, "bits_per_byte_corr": 1.1082874170118666}, "model_output": [{"sum_logits": -1.5364125967025757, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.5364125967025757, "logits_per_char": -0.7682062983512878, "bits_per_byte": 1.1082874170118666, "num_chars": 2}, {"sum_logits": -1.353659987449646, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.353659987449646, "logits_per_char": -0.676829993724823, "bits_per_byte": 0.9764592754724118, "num_chars": 2}, {"sum_logits": -1.3122342824935913, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.3122342824935913, "logits_per_char": -0.6561171412467957, "bits_per_byte": 0.9465769459196757, "num_chars": 2}, {"sum_logits": -1.4280351400375366, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4280351400375366, "logits_per_char": -0.7140175700187683, "bits_per_byte": 1.0301096073744054, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1062, "native_id": "Mercury_7015540", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1406759023666382, "logits_per_token_corr": -1.1406759023666382, "logits_per_char_corr": -0.5703379511833191, "bits_per_byte_corr": 0.8228237338035373}, "model_output": [{"sum_logits": -1.2755311727523804, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.2755311727523804, "logits_per_char": -0.6377655863761902, "bits_per_byte": 0.9201012487152325, "num_chars": 2}, {"sum_logits": -1.1406759023666382, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.1406759023666382, "logits_per_char": -0.5703379511833191, "bits_per_byte": 0.8228237338035373, "num_chars": 2}, {"sum_logits": -1.6171640157699585, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.6171640157699585, "logits_per_char": -0.8085820078849792, "bits_per_byte": 1.1665372529285383, "num_chars": 2}, {"sum_logits": -1.67013680934906, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.67013680934906, "logits_per_char": -0.83506840467453, "bits_per_byte": 1.204749046227868, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1063, "native_id": "Mercury_SC_414001", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.2497267723083496, "logits_per_token_corr": -2.2497267723083496, "logits_per_char_corr": -1.1248633861541748, "bits_per_byte_corr": 1.622834828883361}, "model_output": [{"sum_logits": -1.3629318475723267, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.3629318475723267, "logits_per_char": -0.6814659237861633, "bits_per_byte": 0.9831475087818204, "num_chars": 2}, {"sum_logits": -0.9308677911758423, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -0.9308677911758423, "logits_per_char": -0.46543389558792114, "bits_per_byte": 0.6714791730268078, "num_chars": 2}, {"sum_logits": -1.4406486749649048, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.4406486749649048, "logits_per_char": -0.7203243374824524, "bits_per_byte": 1.0392083495183086, "num_chars": 2}, {"sum_logits": -2.2497267723083496, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -2.2497267723083496, "logits_per_char": -1.1248633861541748, "bits_per_byte": 1.622834828883361, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1064, "native_id": "Mercury_7017973", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8638015985488892, "logits_per_token_corr": -1.8638015985488892, "logits_per_char_corr": -0.9319007992744446, "bits_per_byte_corr": 1.3444486617146685}, "model_output": [{"sum_logits": -1.2809253931045532, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.2809253931045532, "logits_per_char": -0.6404626965522766, "bits_per_byte": 0.9239923561910063, "num_chars": 2}, {"sum_logits": -1.1198745965957642, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.1198745965957642, "logits_per_char": -0.5599372982978821, "bits_per_byte": 0.8078187634636991, "num_chars": 2}, {"sum_logits": -1.4772506952285767, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4772506952285767, "logits_per_char": -0.7386253476142883, "bits_per_byte": 1.0656111260787862, "num_chars": 2}, {"sum_logits": -1.8638015985488892, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.8638015985488892, "logits_per_char": -0.9319007992744446, "bits_per_byte": 1.3444486617146685, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1065, "native_id": "Mercury_407097", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3414674997329712, "logits_per_token_corr": -1.3414674997329712, "logits_per_char_corr": -0.6707337498664856, "bits_per_byte_corr": 0.9676642546899324}, "model_output": [{"sum_logits": -1.417030930519104, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.417030930519104, "logits_per_char": -0.708515465259552, "bits_per_byte": 1.0221717481238266, "num_chars": 2}, {"sum_logits": -1.0676981210708618, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": true, "logits_per_token": -1.0676981210708618, "logits_per_char": -0.5338490605354309, "bits_per_byte": 0.7701813922182515, "num_chars": 2}, {"sum_logits": -1.3414674997329712, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.3414674997329712, "logits_per_char": -0.6707337498664856, "bits_per_byte": 0.9676642546899324, "num_chars": 2}, {"sum_logits": -1.9936243295669556, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.9936243295669556, "logits_per_char": -0.9968121647834778, "bits_per_byte": 1.4380959668319488, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1066, "native_id": "Mercury_SC_406794", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.501815915107727, "logits_per_token_corr": -1.501815915107727, "logits_per_char_corr": -0.7509079575538635, "bits_per_byte_corr": 1.0833311865277975}, "model_output": [{"sum_logits": -1.2933121919631958, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.2933121919631958, "logits_per_char": -0.6466560959815979, "bits_per_byte": 0.9329275428339391, "num_chars": 2}, {"sum_logits": -0.9684392213821411, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -0.9684392213821411, "logits_per_char": -0.48421961069107056, "bits_per_byte": 0.6985812310456939, "num_chars": 2}, {"sum_logits": -1.501815915107727, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.501815915107727, "logits_per_char": -0.7509079575538635, "bits_per_byte": 1.0833311865277975, "num_chars": 2}, {"sum_logits": -2.1750545501708984, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -2.1750545501708984, "logits_per_char": -1.0875272750854492, "bits_per_byte": 1.5689702065983924, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1067, "native_id": "Mercury_7227710", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.0096988677978516, "logits_per_token_corr": -2.0096988677978516, "logits_per_char_corr": -1.0048494338989258, "bits_per_byte_corr": 1.449691295127104}, "model_output": [{"sum_logits": -1.3838826417922974, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3838826417922974, "logits_per_char": -0.6919413208961487, "bits_per_byte": 0.9982603122437498, "num_chars": 2}, {"sum_logits": -0.9876595735549927, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -0.9876595735549927, "logits_per_char": -0.49382978677749634, "bits_per_byte": 0.71244578442766, "num_chars": 2}, {"sum_logits": -1.5858365297317505, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5858365297317505, "logits_per_char": -0.7929182648658752, "bits_per_byte": 1.1439392485531017, "num_chars": 2}, {"sum_logits": -2.0096988677978516, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -2.0096988677978516, "logits_per_char": -1.0048494338989258, "bits_per_byte": 1.449691295127104, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1068, "native_id": "Mercury_SC_406710", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3472681045532227, "logits_per_token_corr": -1.3472681045532227, "logits_per_char_corr": -0.6736340522766113, "bits_per_byte_corr": 0.9718485065941022}, "model_output": [{"sum_logits": -1.1598405838012695, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.1598405838012695, "logits_per_char": -0.5799202919006348, "bits_per_byte": 0.836648129236527, "num_chars": 2}, {"sum_logits": -1.279465675354004, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.279465675354004, "logits_per_char": -0.639732837677002, "bits_per_byte": 0.922939392411098, "num_chars": 2}, {"sum_logits": -1.3472681045532227, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3472681045532227, "logits_per_char": -0.6736340522766113, "bits_per_byte": 0.9718485065941022, "num_chars": 2}, {"sum_logits": -1.988316535949707, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.988316535949707, "logits_per_char": -0.9941582679748535, "bits_per_byte": 1.4342672030671129, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1069, "native_id": "Mercury_401926", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5006893873214722, "logits_per_token_corr": -1.5006893873214722, "logits_per_char_corr": -0.7503446936607361, "bits_per_byte_corr": 1.08251856850247}, "model_output": [{"sum_logits": -1.9157074689865112, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.9157074689865112, "logits_per_char": -0.9578537344932556, "bits_per_byte": 1.3818908326513866, "num_chars": 2}, {"sum_logits": -1.1821030378341675, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.1821030378341675, "logits_per_char": -0.5910515189170837, "bits_per_byte": 0.8527070952521787, "num_chars": 2}, {"sum_logits": -1.1744126081466675, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": true, "logits_per_token": -1.1744126081466675, "logits_per_char": -0.5872063040733337, "bits_per_byte": 0.8471596228659439, "num_chars": 2}, {"sum_logits": -1.5006893873214722, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.5006893873214722, "logits_per_char": -0.7503446936607361, "bits_per_byte": 1.08251856850247, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1070, "native_id": "MCAS_2014_5_15", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.2154946327209473, "logits_per_token_corr": -2.2154946327209473, "logits_per_char_corr": -1.1077473163604736, "bits_per_byte_corr": 1.598141559872461}, "model_output": [{"sum_logits": -1.2390943765640259, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.2390943765640259, "logits_per_char": -0.6195471882820129, "bits_per_byte": 0.8938176561318031, "num_chars": 2}, {"sum_logits": -1.0848852396011353, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -1.0848852396011353, "logits_per_char": -0.5424426198005676, "bits_per_byte": 0.7825792775536585, "num_chars": 2}, {"sum_logits": -1.3709608316421509, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.3709608316421509, "logits_per_char": -0.6854804158210754, "bits_per_byte": 0.9889391965322806, "num_chars": 2}, {"sum_logits": -2.2154946327209473, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -2.2154946327209473, "logits_per_char": -1.1077473163604736, "bits_per_byte": 1.598141559872461, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1071, "native_id": "Mercury_LBS10151", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.296649694442749, "logits_per_token_corr": -1.296649694442749, "logits_per_char_corr": -0.6483248472213745, "bits_per_byte_corr": 0.9353350419720439}, "model_output": [{"sum_logits": -1.296649694442749, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.296649694442749, "logits_per_char": -0.6483248472213745, "bits_per_byte": 0.9353350419720439, "num_chars": 2}, {"sum_logits": -1.147968053817749, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.147968053817749, "logits_per_char": -0.5739840269088745, "bits_per_byte": 0.8280839091715055, "num_chars": 2}, {"sum_logits": -1.5457837581634521, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.5457837581634521, "logits_per_char": -0.7728918790817261, "bits_per_byte": 1.1150472810953596, "num_chars": 2}, {"sum_logits": -1.7468621730804443, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.7468621730804443, "logits_per_char": -0.8734310865402222, "bits_per_byte": 1.260094697110743, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1072, "native_id": "ACTAAP_2013_5_8", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4008314609527588, "logits_per_token_corr": -1.4008314609527588, "logits_per_char_corr": -0.7004157304763794, "bits_per_byte_corr": 1.0104863009196192}, "model_output": [{"sum_logits": -1.3680574893951416, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3680574893951416, "logits_per_char": -0.6840287446975708, "bits_per_byte": 0.9868448778013972, "num_chars": 2}, {"sum_logits": -1.4008314609527588, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4008314609527588, "logits_per_char": -0.7004157304763794, "bits_per_byte": 1.0104863009196192, "num_chars": 2}, {"sum_logits": -1.3306620121002197, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.3306620121002197, "logits_per_char": -0.6653310060501099, "bits_per_byte": 0.959869742978848, "num_chars": 2}, {"sum_logits": -1.5205786228179932, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.5205786228179932, "logits_per_char": -0.7602893114089966, "bits_per_byte": 1.096865619211432, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1073, "native_id": "Mercury_SC_407592", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4110238552093506, "logits_per_token_corr": -1.4110238552093506, "logits_per_char_corr": -0.7055119276046753, "bits_per_byte_corr": 1.0178385592440096}, "model_output": [{"sum_logits": -1.4110238552093506, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4110238552093506, "logits_per_char": -0.7055119276046753, "bits_per_byte": 1.0178385592440096, "num_chars": 2}, {"sum_logits": -1.3257596492767334, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.3257596492767334, "logits_per_char": -0.6628798246383667, "bits_per_byte": 0.9563334357118044, "num_chars": 2}, {"sum_logits": -1.4601590633392334, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4601590633392334, "logits_per_char": -0.7300795316696167, "bits_per_byte": 1.0532821197950497, "num_chars": 2}, {"sum_logits": -1.4500997066497803, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4500997066497803, "logits_per_char": -0.7250498533248901, "bits_per_byte": 1.0460258277898409, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1074, "native_id": "TIMSS_1995_8_L6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.1223039627075195, "logits_per_token_corr": -2.1223039627075195, "logits_per_char_corr": -1.0611519813537598, "bits_per_byte_corr": 1.5309187011296668}, "model_output": [{"sum_logits": -1.5564165115356445, "num_tokens": 1, "num_tokens_all": 439, "is_greedy": false, "logits_per_token": -1.5564165115356445, "logits_per_char": -0.7782082557678223, "bits_per_byte": 1.1227171913758938, "num_chars": 2}, {"sum_logits": -1.0157403945922852, "num_tokens": 1, "num_tokens_all": 439, "is_greedy": true, "logits_per_token": -1.0157403945922852, "logits_per_char": -0.5078701972961426, "bits_per_byte": 0.7327018150549708, "num_chars": 2}, {"sum_logits": -1.2041864395141602, "num_tokens": 1, "num_tokens_all": 439, "is_greedy": false, "logits_per_token": -1.2041864395141602, "logits_per_char": -0.6020932197570801, "bits_per_byte": 0.8686369022970324, "num_chars": 2}, {"sum_logits": -2.1223039627075195, "num_tokens": 1, "num_tokens_all": 439, "is_greedy": false, "logits_per_token": -2.1223039627075195, "logits_per_char": -1.0611519813537598, "bits_per_byte": 1.5309187011296668, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1075, "native_id": "Mercury_7233398", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2997599840164185, "logits_per_token_corr": -1.2997599840164185, "logits_per_char_corr": -0.6498799920082092, "bits_per_byte_corr": 0.9375786416438762}, "model_output": [{"sum_logits": -1.5367175340652466, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.5367175340652466, "logits_per_char": -0.7683587670326233, "bits_per_byte": 1.1085073828223204, "num_chars": 2}, {"sum_logits": -1.0902796983718872, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.0902796983718872, "logits_per_char": -0.5451398491859436, "bits_per_byte": 0.7864705570120832, "num_chars": 2}, {"sum_logits": -1.2997599840164185, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.2997599840164185, "logits_per_char": -0.6498799920082092, "bits_per_byte": 0.9375786416438762, "num_chars": 2}, {"sum_logits": -1.809968113899231, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.809968113899231, "logits_per_char": -0.9049840569496155, "bits_per_byte": 1.3056160110457236, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1076, "native_id": "Mercury_407664", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5130258798599243, "logits_per_token_corr": -1.5130258798599243, "logits_per_char_corr": -0.7565129399299622, "bits_per_byte_corr": 1.0914174668060708}, "model_output": [{"sum_logits": -1.571327805519104, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.571327805519104, "logits_per_char": -0.785663902759552, "bits_per_byte": 1.1334734163174889, "num_chars": 2}, {"sum_logits": -1.2025896310806274, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2025896310806274, "logits_per_char": -0.6012948155403137, "bits_per_byte": 0.8674850484928779, "num_chars": 2}, {"sum_logits": -1.3629995584487915, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3629995584487915, "logits_per_char": -0.6814997792243958, "bits_per_byte": 0.9831963518546655, "num_chars": 2}, {"sum_logits": -1.5130258798599243, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5130258798599243, "logits_per_char": -0.7565129399299622, "bits_per_byte": 1.0914174668060708, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1077, "native_id": "Mercury_SC_408657", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.253080129623413, "logits_per_token_corr": -2.253080129623413, "logits_per_char_corr": -1.1265400648117065, "bits_per_byte_corr": 1.625253764867748}, "model_output": [{"sum_logits": -1.352172613143921, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.352172613143921, "logits_per_char": -0.6760863065719604, "bits_per_byte": 0.9753863617050035, "num_chars": 2}, {"sum_logits": -0.9880940914154053, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -0.9880940914154053, "logits_per_char": -0.49404704570770264, "bits_per_byte": 0.7127592228088577, "num_chars": 2}, {"sum_logits": -1.364623785018921, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.364623785018921, "logits_per_char": -0.6823118925094604, "bits_per_byte": 0.9843679836636693, "num_chars": 2}, {"sum_logits": -2.253080129623413, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -2.253080129623413, "logits_per_char": -1.1265400648117065, "bits_per_byte": 1.625253764867748, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1078, "native_id": "Mercury_7142800", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1817700862884521, "logits_per_token_corr": -1.1817700862884521, "logits_per_char_corr": -0.5908850431442261, "bits_per_byte_corr": 0.8524669214802486}, "model_output": [{"sum_logits": -1.423633337020874, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.423633337020874, "logits_per_char": -0.711816668510437, "bits_per_byte": 1.0269343776828483, "num_chars": 2}, {"sum_logits": -1.1817700862884521, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.1817700862884521, "logits_per_char": -0.5908850431442261, "bits_per_byte": 0.8524669214802486, "num_chars": 2}, {"sum_logits": -1.29007887840271, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.29007887840271, "logits_per_char": -0.645039439201355, "bits_per_byte": 0.9305952001142614, "num_chars": 2}, {"sum_logits": -1.808307409286499, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.808307409286499, "logits_per_char": -0.9041537046432495, "bits_per_byte": 1.3044180658911377, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1079, "native_id": "Mercury_SC_410837", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9429119229316711, "logits_per_token_corr": -0.9429119229316711, "logits_per_char_corr": -0.47145596146583557, "bits_per_byte_corr": 0.6801671776047878}, "model_output": [{"sum_logits": -1.1905028820037842, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.1905028820037842, "logits_per_char": -0.5952514410018921, "bits_per_byte": 0.8587663020160561, "num_chars": 2}, {"sum_logits": -0.9429119229316711, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -0.9429119229316711, "logits_per_char": -0.47145596146583557, "bits_per_byte": 0.6801671776047878, "num_chars": 2}, {"sum_logits": -1.6207396984100342, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.6207396984100342, "logits_per_char": -0.8103698492050171, "bits_per_byte": 1.1691165627348552, "num_chars": 2}, {"sum_logits": -2.2954070568084717, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -2.2954070568084717, "logits_per_char": -1.1477035284042358, "bits_per_byte": 1.6557861888407461, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1080, "native_id": "Mercury_7154315", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4245984554290771, "logits_per_token_corr": -1.4245984554290771, "logits_per_char_corr": -0.7122992277145386, "bits_per_byte_corr": 1.0276305634535416}, "model_output": [{"sum_logits": -1.4245984554290771, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4245984554290771, "logits_per_char": -0.7122992277145386, "bits_per_byte": 1.0276305634535416, "num_chars": 2}, {"sum_logits": -1.130054235458374, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.130054235458374, "logits_per_char": -0.565027117729187, "bits_per_byte": 0.8151618207162682, "num_chars": 2}, {"sum_logits": -1.3230626583099365, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3230626583099365, "logits_per_char": -0.6615313291549683, "bits_per_byte": 0.954387967965243, "num_chars": 2}, {"sum_logits": -1.833768606185913, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.833768606185913, "logits_per_char": -0.9168843030929565, "bits_per_byte": 1.322784437142092, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1081, "native_id": "Mercury_7239628", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.668829083442688, "logits_per_token_corr": -1.668829083442688, "logits_per_char_corr": -0.834414541721344, "bits_per_byte_corr": 1.203805721387885}, "model_output": [{"sum_logits": -1.3509141206741333, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3509141206741333, "logits_per_char": -0.6754570603370667, "bits_per_byte": 0.9744785512824234, "num_chars": 2}, {"sum_logits": -1.2200661897659302, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2200661897659302, "logits_per_char": -0.6100330948829651, "bits_per_byte": 0.8800917207664325, "num_chars": 2}, {"sum_logits": -1.4075337648391724, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4075337648391724, "logits_per_char": -0.7037668824195862, "bits_per_byte": 1.0153209912093526, "num_chars": 2}, {"sum_logits": -1.668829083442688, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.668829083442688, "logits_per_char": -0.834414541721344, "bits_per_byte": 1.203805721387885, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1082, "native_id": "Mercury_401241", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3247225284576416, "logits_per_token_corr": -1.3247225284576416, "logits_per_char_corr": -0.6623612642288208, "bits_per_byte_corr": 0.9555853111805507}, "model_output": [{"sum_logits": -1.3247225284576416, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3247225284576416, "logits_per_char": -0.6623612642288208, "bits_per_byte": 0.9555853111805507, "num_chars": 2}, {"sum_logits": -1.2359049320220947, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.2359049320220947, "logits_per_char": -0.6179524660110474, "bits_per_byte": 0.8915169582198842, "num_chars": 2}, {"sum_logits": -1.3455488681793213, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3455488681793213, "logits_per_char": -0.6727744340896606, "bits_per_byte": 0.9706083396987295, "num_chars": 2}, {"sum_logits": -1.776916742324829, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.776916742324829, "logits_per_char": -0.8884583711624146, "bits_per_byte": 1.2817744861132223, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1083, "native_id": "Mercury_SC_408251", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2269898653030396, "logits_per_token_corr": -1.2269898653030396, "logits_per_char_corr": -0.6134949326515198, "bits_per_byte_corr": 0.8850860969474921}, "model_output": [{"sum_logits": -1.2269898653030396, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.2269898653030396, "logits_per_char": -0.6134949326515198, "bits_per_byte": 0.8850860969474921, "num_chars": 2}, {"sum_logits": -1.1649857759475708, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.1649857759475708, "logits_per_char": -0.5824928879737854, "bits_per_byte": 0.8403596008334745, "num_chars": 2}, {"sum_logits": -1.486312985420227, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.486312985420227, "logits_per_char": -0.7431564927101135, "bits_per_byte": 1.0721481866380862, "num_chars": 2}, {"sum_logits": -1.877151608467102, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.877151608467102, "logits_per_char": -0.938575804233551, "bits_per_byte": 1.3540786582670874, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1084, "native_id": "Mercury_7175893", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9610449075698853, "logits_per_token_corr": -1.9610449075698853, "logits_per_char_corr": -0.9805224537849426, "bits_per_byte_corr": 1.4145948815568308}, "model_output": [{"sum_logits": -1.3491045236587524, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3491045236587524, "logits_per_char": -0.6745522618293762, "bits_per_byte": 0.9731732029623739, "num_chars": 2}, {"sum_logits": -1.0877176523208618, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.0877176523208618, "logits_per_char": -0.5438588261604309, "bits_per_byte": 0.7846224314459103, "num_chars": 2}, {"sum_logits": -1.426827073097229, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.426827073097229, "logits_per_char": -0.7134135365486145, "bits_per_byte": 1.0292381712824827, "num_chars": 2}, {"sum_logits": -1.9610449075698853, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.9610449075698853, "logits_per_char": -0.9805224537849426, "bits_per_byte": 1.4145948815568308, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1085, "native_id": "Mercury_7202843", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3662965297698975, "logits_per_token_corr": -1.3662965297698975, "logits_per_char_corr": -0.6831482648849487, "bits_per_byte_corr": 0.9855746139421235}, "model_output": [{"sum_logits": -1.3662965297698975, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.3662965297698975, "logits_per_char": -0.6831482648849487, "bits_per_byte": 0.9855746139421235, "num_chars": 2}, {"sum_logits": -1.0414845943450928, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": true, "logits_per_token": -1.0414845943450928, "logits_per_char": -0.5207422971725464, "bits_per_byte": 0.7512723297124992, "num_chars": 2}, {"sum_logits": -1.5631558895111084, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.5631558895111084, "logits_per_char": -0.7815779447555542, "bits_per_byte": 1.1275786249678363, "num_chars": 2}, {"sum_logits": -1.800962209701538, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.800962209701538, "logits_per_char": -0.900481104850769, "bits_per_byte": 1.2991196243833525, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1086, "native_id": "Mercury_7159023", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4163084030151367, "logits_per_token_corr": -1.4163084030151367, "logits_per_char_corr": -0.7081542015075684, "bits_per_byte_corr": 1.0216505547003865}, "model_output": [{"sum_logits": -1.4870519638061523, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": false, "logits_per_token": -1.4870519638061523, "logits_per_char": -0.7435259819030762, "bits_per_byte": 1.0726812468644358, "num_chars": 2}, {"sum_logits": -1.0175180435180664, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": true, "logits_per_token": -1.0175180435180664, "logits_per_char": -0.5087590217590332, "bits_per_byte": 0.7339841176998048, "num_chars": 2}, {"sum_logits": -1.4163084030151367, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": false, "logits_per_token": -1.4163084030151367, "logits_per_char": -0.7081542015075684, "bits_per_byte": 1.0216505547003865, "num_chars": 2}, {"sum_logits": -1.8422346115112305, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": false, "logits_per_token": -1.8422346115112305, "logits_per_char": -0.9211173057556152, "bits_per_byte": 1.3288913690915838, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1087, "native_id": "MDSA_2008_8_3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7448498010635376, "logits_per_token_corr": -1.7448498010635376, "logits_per_char_corr": -0.8724249005317688, "bits_per_byte_corr": 1.2586430775461344}, "model_output": [{"sum_logits": -1.3307758569717407, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.3307758569717407, "logits_per_char": -0.6653879284858704, "bits_per_byte": 0.9599518646946351, "num_chars": 2}, {"sum_logits": -1.218894600868225, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": true, "logits_per_token": -1.218894600868225, "logits_per_char": -0.6094473004341125, "bits_per_byte": 0.8792465980200921, "num_chars": 2}, {"sum_logits": -1.3739601373672485, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.3739601373672485, "logits_per_char": -0.6869800686836243, "bits_per_byte": 0.9911027382801363, "num_chars": 2}, {"sum_logits": -1.7448498010635376, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.7448498010635376, "logits_per_char": -0.8724249005317688, "bits_per_byte": 1.2586430775461344, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1088, "native_id": "Mercury_7218348", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4210044145584106, "logits_per_token_corr": -1.4210044145584106, "logits_per_char_corr": -0.7105022072792053, "bits_per_byte_corr": 1.0250380109831083}, "model_output": [{"sum_logits": -1.4210044145584106, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4210044145584106, "logits_per_char": -0.7105022072792053, "bits_per_byte": 1.0250380109831083, "num_chars": 2}, {"sum_logits": -1.2049304246902466, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2049304246902466, "logits_per_char": -0.6024652123451233, "bits_per_byte": 0.8691735741590502, "num_chars": 2}, {"sum_logits": -1.403767704963684, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.403767704963684, "logits_per_char": -0.701883852481842, "bits_per_byte": 1.0126043532563218, "num_chars": 2}, {"sum_logits": -1.6380282640457153, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.6380282640457153, "logits_per_char": -0.8190141320228577, "bits_per_byte": 1.1815876266882044, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1089, "native_id": "Mercury_SC_406458", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7567569017410278, "logits_per_token_corr": -1.7567569017410278, "logits_per_char_corr": -0.8783784508705139, "bits_per_byte_corr": 1.2672322350955307}, "model_output": [{"sum_logits": -1.3976527452468872, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.3976527452468872, "logits_per_char": -0.6988263726234436, "bits_per_byte": 1.0081933422269893, "num_chars": 2}, {"sum_logits": -1.2264491319656372, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.2264491319656372, "logits_per_char": -0.6132245659828186, "bits_per_byte": 0.8846960402953349, "num_chars": 2}, {"sum_logits": -1.3033305406570435, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.3033305406570435, "logits_per_char": -0.6516652703285217, "bits_per_byte": 0.9401542538231995, "num_chars": 2}, {"sum_logits": -1.7567569017410278, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.7567569017410278, "logits_per_char": -0.8783784508705139, "bits_per_byte": 1.2672322350955307, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1090, "native_id": "LEAP_2007_4_10280", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4298701286315918, "logits_per_token_corr": -1.4298701286315918, "logits_per_char_corr": -0.7149350643157959, "bits_per_byte_corr": 1.0314332718467718}, "model_output": [{"sum_logits": -1.4298701286315918, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4298701286315918, "logits_per_char": -0.7149350643157959, "bits_per_byte": 1.0314332718467718, "num_chars": 2}, {"sum_logits": -1.1604094505310059, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.1604094505310059, "logits_per_char": -0.5802047252655029, "bits_per_byte": 0.8370584798414858, "num_chars": 2}, {"sum_logits": -1.2693877220153809, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.2693877220153809, "logits_per_char": -0.6346938610076904, "bits_per_byte": 0.9156696857591219, "num_chars": 2}, {"sum_logits": -1.883988857269287, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.883988857269287, "logits_per_char": -0.9419944286346436, "bits_per_byte": 1.3590106907372093, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1091, "native_id": "Mercury_7216965", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.149661660194397, "logits_per_token_corr": -1.149661660194397, "logits_per_char_corr": -0.5748308300971985, "bits_per_byte_corr": 0.8293055879319103}, "model_output": [{"sum_logits": -1.5200859308242798, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.5200859308242798, "logits_per_char": -0.7600429654121399, "bits_per_byte": 1.096510217063424, "num_chars": 2}, {"sum_logits": -1.4790245294570923, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4790245294570923, "logits_per_char": -0.7395122647285461, "bits_per_byte": 1.0668906770012063, "num_chars": 2}, {"sum_logits": -1.149661660194397, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.149661660194397, "logits_per_char": -0.5748308300971985, "bits_per_byte": 0.8293055879319103, "num_chars": 2}, {"sum_logits": -1.5040050745010376, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.5040050745010376, "logits_per_char": -0.7520025372505188, "bits_per_byte": 1.0849103312280208, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1092, "native_id": "NYSEDREGENTS_2010_8_42", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3903145790100098, "logits_per_token_corr": -1.3903145790100098, "logits_per_char_corr": -0.6951572895050049, "bits_per_byte_corr": 1.0028999742074045}, "model_output": [{"sum_logits": -1.4951272010803223, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.4951272010803223, "logits_per_char": -0.7475636005401611, "bits_per_byte": 1.0785062992491634, "num_chars": 2}, {"sum_logits": -1.3903145790100098, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.3903145790100098, "logits_per_char": -0.6951572895050049, "bits_per_byte": 1.0028999742074045, "num_chars": 2}, {"sum_logits": -1.1140618324279785, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": true, "logits_per_token": -1.1140618324279785, "logits_per_char": -0.5570309162139893, "bits_per_byte": 0.8036257404443353, "num_chars": 2}, {"sum_logits": -1.7440094947814941, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.7440094947814941, "logits_per_char": -0.8720047473907471, "bits_per_byte": 1.258036924693168, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1093, "native_id": "LEAP__7_10351", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3608496189117432, "logits_per_token_corr": -1.3608496189117432, "logits_per_char_corr": -0.6804248094558716, "bits_per_byte_corr": 0.9816454983005091}, "model_output": [{"sum_logits": -1.5229628086090088, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.5229628086090088, "logits_per_char": -0.7614814043045044, "bits_per_byte": 1.0985854457200614, "num_chars": 2}, {"sum_logits": -0.9464170932769775, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": true, "logits_per_token": -0.9464170932769775, "logits_per_char": -0.47320854663848877, "bits_per_byte": 0.682695623542112, "num_chars": 2}, {"sum_logits": -1.3608496189117432, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.3608496189117432, "logits_per_char": -0.6804248094558716, "bits_per_byte": 0.9816454983005091, "num_chars": 2}, {"sum_logits": -2.0456717014312744, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -2.0456717014312744, "logits_per_char": -1.0228358507156372, "bits_per_byte": 1.475640209471954, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1094, "native_id": "Mercury_SC_400590", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.082221031188965, "logits_per_token_corr": -2.082221031188965, "logits_per_char_corr": -1.0411105155944824, "bits_per_byte_corr": 1.5020049778665907}, "model_output": [{"sum_logits": -1.1333932876586914, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.1333932876586914, "logits_per_char": -0.5666966438293457, "bits_per_byte": 0.8175704377416037, "num_chars": 2}, {"sum_logits": -1.1685075759887695, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.1685075759887695, "logits_per_char": -0.5842537879943848, "bits_per_byte": 0.8429000425606963, "num_chars": 2}, {"sum_logits": -1.476771354675293, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.476771354675293, "logits_per_char": -0.7383856773376465, "bits_per_byte": 1.0652653549592261, "num_chars": 2}, {"sum_logits": -2.082221031188965, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -2.082221031188965, "logits_per_char": -1.0411105155944824, "bits_per_byte": 1.5020049778665907, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1095, "native_id": "Mercury_7086608", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0680922269821167, "logits_per_token_corr": -1.0680922269821167, "logits_per_char_corr": -0.5340461134910583, "bits_per_byte_corr": 0.7704656795401279}, "model_output": [{"sum_logits": -1.1845186948776245, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.1845186948776245, "logits_per_char": -0.5922593474388123, "bits_per_byte": 0.854449623470722, "num_chars": 2}, {"sum_logits": -1.0680922269821167, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.0680922269821167, "logits_per_char": -0.5340461134910583, "bits_per_byte": 0.7704656795401279, "num_chars": 2}, {"sum_logits": -1.6103705167770386, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.6103705167770386, "logits_per_char": -0.8051852583885193, "bits_per_byte": 1.1616367792748499, "num_chars": 2}, {"sum_logits": -2.0168895721435547, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -2.0168895721435547, "logits_per_char": -1.0084447860717773, "bits_per_byte": 1.45487829187713, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1096, "native_id": "Mercury_7187863", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5399531126022339, "logits_per_token_corr": -1.5399531126022339, "logits_per_char_corr": -0.7699765563011169, "bits_per_byte_corr": 1.1108413593771813}, "model_output": [{"sum_logits": -1.3922804594039917, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3922804594039917, "logits_per_char": -0.6961402297019958, "bits_per_byte": 1.004318057155095, "num_chars": 2}, {"sum_logits": -1.3235167264938354, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.3235167264938354, "logits_per_char": -0.6617583632469177, "bits_per_byte": 0.9547155089238114, "num_chars": 2}, {"sum_logits": -1.3477476835250854, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3477476835250854, "logits_per_char": -0.6738738417625427, "bits_per_byte": 0.9721944496963129, "num_chars": 2}, {"sum_logits": -1.5399531126022339, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.5399531126022339, "logits_per_char": -0.7699765563011169, "bits_per_byte": 1.1108413593771813, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1097, "native_id": "Mercury_7120873", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3812260627746582, "logits_per_token_corr": -1.3812260627746582, "logits_per_char_corr": -0.6906130313873291, "bits_per_byte_corr": 0.9963439955565095}, "model_output": [{"sum_logits": -1.3346943855285645, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3346943855285645, "logits_per_char": -0.6673471927642822, "bits_per_byte": 0.9627784855528928, "num_chars": 2}, {"sum_logits": -1.05759859085083, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.05759859085083, "logits_per_char": -0.528799295425415, "bits_per_byte": 0.7628961211363723, "num_chars": 2}, {"sum_logits": -1.3812260627746582, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3812260627746582, "logits_per_char": -0.6906130313873291, "bits_per_byte": 0.9963439955565095, "num_chars": 2}, {"sum_logits": -2.041616916656494, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -2.041616916656494, "logits_per_char": -1.020808458328247, "bits_per_byte": 1.4727153005287281, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1098, "native_id": "Mercury_184730", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1328120231628418, "logits_per_token_corr": -1.1328120231628418, "logits_per_char_corr": -0.5664060115814209, "bits_per_byte_corr": 0.8171511440387997}, "model_output": [{"sum_logits": -1.3706908226013184, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.3706908226013184, "logits_per_char": -0.6853454113006592, "bits_per_byte": 0.9887444261801783, "num_chars": 2}, {"sum_logits": -1.1328120231628418, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -1.1328120231628418, "logits_per_char": -0.5664060115814209, "bits_per_byte": 0.8171511440387997, "num_chars": 2}, {"sum_logits": -1.2784361839294434, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.2784361839294434, "logits_per_char": -0.6392180919647217, "bits_per_byte": 0.9221967713246718, "num_chars": 2}, {"sum_logits": -2.012397289276123, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -2.012397289276123, "logits_per_char": -1.0061986446380615, "bits_per_byte": 1.4516377947695707, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1099, "native_id": "Mercury_SC_401265", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.1340010166168213, "logits_per_token_corr": -2.1340010166168213, "logits_per_char_corr": -1.0670005083084106, "bits_per_byte_corr": 1.5393563419636531}, "model_output": [{"sum_logits": -1.1926538944244385, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.1926538944244385, "logits_per_char": -0.5963269472122192, "bits_per_byte": 0.8603179294921415, "num_chars": 2}, {"sum_logits": -1.1531717777252197, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.1531717777252197, "logits_per_char": -0.5765858888626099, "bits_per_byte": 0.8318376025092399, "num_chars": 2}, {"sum_logits": -1.3704588413238525, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3704588413238525, "logits_per_char": -0.6852294206619263, "bits_per_byte": 0.9885770870608886, "num_chars": 2}, {"sum_logits": -2.1340010166168213, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -2.1340010166168213, "logits_per_char": -1.0670005083084106, "bits_per_byte": 1.5393563419636531, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1100, "native_id": "OHAT_2009_8_34", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.9242181777954102, "logits_per_token_corr": -1.9242181777954102, "logits_per_char_corr": -0.9621090888977051, "bits_per_byte_corr": 1.3880300113479151}, "model_output": [{"sum_logits": -1.245772361755371, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.245772361755371, "logits_per_char": -0.6228861808776855, "bits_per_byte": 0.8986348041911484, "num_chars": 2}, {"sum_logits": -1.1437616348266602, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.1437616348266602, "logits_per_char": -0.5718808174133301, "bits_per_byte": 0.8250496192623308, "num_chars": 2}, {"sum_logits": -1.4373960494995117, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4373960494995117, "logits_per_char": -0.7186980247497559, "bits_per_byte": 1.0368620762039111, "num_chars": 2}, {"sum_logits": -1.9242181777954102, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.9242181777954102, "logits_per_char": -0.9621090888977051, "bits_per_byte": 1.3880300113479151, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1101, "native_id": "Mercury_406639", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1143864393234253, "logits_per_token_corr": -1.1143864393234253, "logits_per_char_corr": -0.5571932196617126, "bits_per_byte_corr": 0.8038598948234852}, "model_output": [{"sum_logits": -1.497141718864441, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.497141718864441, "logits_per_char": -0.7485708594322205, "bits_per_byte": 1.0799594666576298, "num_chars": 2}, {"sum_logits": -1.1143864393234253, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.1143864393234253, "logits_per_char": -0.5571932196617126, "bits_per_byte": 0.8038598948234852, "num_chars": 2}, {"sum_logits": -1.3092602491378784, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3092602491378784, "logits_per_char": -0.6546301245689392, "bits_per_byte": 0.9444316343328115, "num_chars": 2}, {"sum_logits": -1.7749031782150269, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.7749031782150269, "logits_per_char": -0.8874515891075134, "bits_per_byte": 1.2803220066353596, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1102, "native_id": "Mercury_7008610", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6690846681594849, "logits_per_token_corr": -1.6690846681594849, "logits_per_char_corr": -0.8345423340797424, "bits_per_byte_corr": 1.20399008678961}, "model_output": [{"sum_logits": -1.2735158205032349, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.2735158205032349, "logits_per_char": -0.6367579102516174, "bits_per_byte": 0.9186474793674881, "num_chars": 2}, {"sum_logits": -1.2379056215286255, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.2379056215286255, "logits_per_char": -0.6189528107643127, "bits_per_byte": 0.8929601506346005, "num_chars": 2}, {"sum_logits": -1.4947720766067505, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4947720766067505, "logits_per_char": -0.7473860383033752, "bits_per_byte": 1.078250131090703, "num_chars": 2}, {"sum_logits": -1.6690846681594849, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.6690846681594849, "logits_per_char": -0.8345423340797424, "bits_per_byte": 1.20399008678961, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1103, "native_id": "MCAS_2009_8_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2247244119644165, "logits_per_token_corr": -1.2247244119644165, "logits_per_char_corr": -0.6123622059822083, "bits_per_byte_corr": 0.8834519177989925}, "model_output": [{"sum_logits": -1.2247244119644165, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.2247244119644165, "logits_per_char": -0.6123622059822083, "bits_per_byte": 0.8834519177989925, "num_chars": 2}, {"sum_logits": -1.12319815158844, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.12319815158844, "logits_per_char": -0.56159907579422, "bits_per_byte": 0.8102162016167284, "num_chars": 2}, {"sum_logits": -1.4146467447280884, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4146467447280884, "logits_per_char": -0.7073233723640442, "bits_per_byte": 1.0204519216151973, "num_chars": 2}, {"sum_logits": -2.0546154975891113, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -2.0546154975891113, "logits_per_char": -1.0273077487945557, "bits_per_byte": 1.4820917946537753, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1104, "native_id": "MCAS_2005_8_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4941165447235107, "logits_per_token_corr": -1.4941165447235107, "logits_per_char_corr": -0.7470582723617554, "bits_per_byte_corr": 1.0777772647921553}, "model_output": [{"sum_logits": -1.346022367477417, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.346022367477417, "logits_per_char": -0.6730111837387085, "bits_per_byte": 0.9709498972433432, "num_chars": 2}, {"sum_logits": -1.0164477825164795, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.0164477825164795, "logits_per_char": -0.5082238912582397, "bits_per_byte": 0.733212087580081, "num_chars": 2}, {"sum_logits": -1.4941165447235107, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4941165447235107, "logits_per_char": -0.7470582723617554, "bits_per_byte": 1.0777772647921553, "num_chars": 2}, {"sum_logits": -2.0110785961151123, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -2.0110785961151123, "logits_per_char": -1.0055392980575562, "bits_per_byte": 1.4506865587276478, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1105, "native_id": "ACTAAP_2008_7_4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8145290613174438, "logits_per_token_corr": -1.8145290613174438, "logits_per_char_corr": -0.9072645306587219, "bits_per_byte_corr": 1.3089060391567313}, "model_output": [{"sum_logits": -1.2766185998916626, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.2766185998916626, "logits_per_char": -0.6383092999458313, "bits_per_byte": 0.9208856615858183, "num_chars": 2}, {"sum_logits": -1.8145290613174438, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.8145290613174438, "logits_per_char": -0.9072645306587219, "bits_per_byte": 1.3089060391567313, "num_chars": 2}, {"sum_logits": -1.399802803993225, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.399802803993225, "logits_per_char": -0.6999014019966125, "bits_per_byte": 1.009744281772471, "num_chars": 2}, {"sum_logits": -1.504010796546936, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.504010796546936, "logits_per_char": -0.752005398273468, "bits_per_byte": 1.0849144588116415, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1106, "native_id": "NYSEDREGENTS_2008_4_3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3841402530670166, "logits_per_token_corr": -1.3841402530670166, "logits_per_char_corr": -0.6920701265335083, "bits_per_byte_corr": 0.9984461394980072}, "model_output": [{"sum_logits": -1.1889536380767822, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.1889536380767822, "logits_per_char": -0.5944768190383911, "bits_per_byte": 0.8576487587507488, "num_chars": 2}, {"sum_logits": -1.626842737197876, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.626842737197876, "logits_per_char": -0.813421368598938, "bits_per_byte": 1.1735189746316446, "num_chars": 2}, {"sum_logits": -1.4633677005767822, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4633677005767822, "logits_per_char": -0.7316838502883911, "bits_per_byte": 1.055596662310363, "num_chars": 2}, {"sum_logits": -1.3841402530670166, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3841402530670166, "logits_per_char": -0.6920701265335083, "bits_per_byte": 0.9984461394980072, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1107, "native_id": "Mercury_SC_416181", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.157130479812622, "logits_per_token_corr": -1.157130479812622, "logits_per_char_corr": -0.578565239906311, "bits_per_byte_corr": 0.834693202444168}, "model_output": [{"sum_logits": -1.4710915088653564, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4710915088653564, "logits_per_char": -0.7355457544326782, "bits_per_byte": 1.0611682122677186, "num_chars": 2}, {"sum_logits": -1.157130479812622, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.157130479812622, "logits_per_char": -0.578565239906311, "bits_per_byte": 0.834693202444168, "num_chars": 2}, {"sum_logits": -1.3439056873321533, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3439056873321533, "logits_per_char": -0.6719528436660767, "bits_per_byte": 0.9694230352689822, "num_chars": 2}, {"sum_logits": -1.716843843460083, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.716843843460083, "logits_per_char": -0.8584219217300415, "bits_per_byte": 1.2384410494711946, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1108, "native_id": "NYSEDREGENTS_2010_4_30", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.407728672027588, "logits_per_token_corr": -1.407728672027588, "logits_per_char_corr": -0.703864336013794, "bits_per_byte_corr": 1.0154615870264332}, "model_output": [{"sum_logits": -1.01267671585083, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": true, "logits_per_token": -1.01267671585083, "logits_per_char": -0.506338357925415, "bits_per_byte": 0.7304918379913821, "num_chars": 2}, {"sum_logits": -1.407728672027588, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.407728672027588, "logits_per_char": -0.703864336013794, "bits_per_byte": 1.0154615870264332, "num_chars": 2}, {"sum_logits": -1.4729351997375488, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.4729351997375488, "logits_per_char": -0.7364675998687744, "bits_per_byte": 1.0624981541068417, "num_chars": 2}, {"sum_logits": -1.9730896949768066, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.9730896949768066, "logits_per_char": -0.9865448474884033, "bits_per_byte": 1.4232833590871008, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1109, "native_id": "Mercury_7025060", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8207643032073975, "logits_per_token_corr": -1.8207643032073975, "logits_per_char_corr": -0.9103821516036987, "bits_per_byte_corr": 1.3134038154334242}, "model_output": [{"sum_logits": -1.6526696681976318, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.6526696681976318, "logits_per_char": -0.8263348340988159, "bits_per_byte": 1.1921491672690225, "num_chars": 2}, {"sum_logits": -1.2003953456878662, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.2003953456878662, "logits_per_char": -0.6001976728439331, "bits_per_byte": 0.8659022061656608, "num_chars": 2}, {"sum_logits": -1.135751485824585, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.135751485824585, "logits_per_char": -0.5678757429122925, "bits_per_byte": 0.8192715181412888, "num_chars": 2}, {"sum_logits": -1.8207643032073975, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.8207643032073975, "logits_per_char": -0.9103821516036987, "bits_per_byte": 1.3134038154334242, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1110, "native_id": "Mercury_SC_402103", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.1522014141082764, "logits_per_token_corr": -2.1522014141082764, "logits_per_char_corr": -1.0761007070541382, "bits_per_byte_corr": 1.5524851535652278}, "model_output": [{"sum_logits": -1.219456434249878, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.219456434249878, "logits_per_char": -0.609728217124939, "bits_per_byte": 0.8796518751368505, "num_chars": 2}, {"sum_logits": -1.0151126384735107, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.0151126384735107, "logits_per_char": -0.5075563192367554, "bits_per_byte": 0.7322489847352486, "num_chars": 2}, {"sum_logits": -1.5489776134490967, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.5489776134490967, "logits_per_char": -0.7744888067245483, "bits_per_byte": 1.1173511606863196, "num_chars": 2}, {"sum_logits": -2.1522014141082764, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -2.1522014141082764, "logits_per_char": -1.0761007070541382, "bits_per_byte": 1.5524851535652278, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1111, "native_id": "VASoL_2009_5_37", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6012225151062012, "logits_per_token_corr": -1.6012225151062012, "logits_per_char_corr": -0.8006112575531006, "bits_per_byte_corr": 1.1550378909525647}, "model_output": [{"sum_logits": -1.1771903038024902, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.1771903038024902, "logits_per_char": -0.5885951519012451, "bits_per_byte": 0.8491633067398225, "num_chars": 2}, {"sum_logits": -1.1715445518493652, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": true, "logits_per_token": -1.1715445518493652, "logits_per_char": -0.5857722759246826, "bits_per_byte": 0.8450907575673883, "num_chars": 2}, {"sum_logits": -1.6012225151062012, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.6012225151062012, "logits_per_char": -0.8006112575531006, "bits_per_byte": 1.1550378909525647, "num_chars": 2}, {"sum_logits": -1.7887377738952637, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.7887377738952637, "logits_per_char": -0.8943688869476318, "bits_per_byte": 1.2903015579256574, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1112, "native_id": "Mercury_SC_402981", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7276431322097778, "logits_per_token_corr": -1.7276431322097778, "logits_per_char_corr": -0.8638215661048889, "bits_per_byte_corr": 1.2462310896333564}, "model_output": [{"sum_logits": -1.3560858964920044, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3560858964920044, "logits_per_char": -0.6780429482460022, "bits_per_byte": 0.9782091989449422, "num_chars": 2}, {"sum_logits": -1.1500502824783325, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.1500502824783325, "logits_per_char": -0.5750251412391663, "bits_per_byte": 0.8295859196528169, "num_chars": 2}, {"sum_logits": -1.453358769416809, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.453358769416809, "logits_per_char": -0.7266793847084045, "bits_per_byte": 1.0483767446358117, "num_chars": 2}, {"sum_logits": -1.7276431322097778, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.7276431322097778, "logits_per_char": -0.8638215661048889, "bits_per_byte": 1.2462310896333564, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1113, "native_id": "NYSEDREGENTS_2008_8_5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0880218744277954, "logits_per_token_corr": -1.0880218744277954, "logits_per_char_corr": -0.5440109372138977, "bits_per_byte_corr": 0.7848418813084113}, "model_output": [{"sum_logits": -1.3864723443984985, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3864723443984985, "logits_per_char": -0.6932361721992493, "bits_per_byte": 1.000128387797423, "num_chars": 2}, {"sum_logits": -1.0880218744277954, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.0880218744277954, "logits_per_char": -0.5440109372138977, "bits_per_byte": 0.7848418813084113, "num_chars": 2}, {"sum_logits": -1.4747196435928345, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4747196435928345, "logits_per_char": -0.7373598217964172, "bits_per_byte": 1.0637853582572252, "num_chars": 2}, {"sum_logits": -2.1128592491149902, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -2.1128592491149902, "logits_per_char": -1.0564296245574951, "bits_per_byte": 1.5241057803983826, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1114, "native_id": "MCAS_1998_4_13", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.294417142868042, "logits_per_token_corr": -1.294417142868042, "logits_per_char_corr": -0.647208571434021, "bits_per_byte_corr": 0.9337245964293633}, "model_output": [{"sum_logits": -1.3927924633026123, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3927924633026123, "logits_per_char": -0.6963962316513062, "bits_per_byte": 1.004687389897823, "num_chars": 2}, {"sum_logits": -1.294417142868042, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.294417142868042, "logits_per_char": -0.647208571434021, "bits_per_byte": 0.9337245964293633, "num_chars": 2}, {"sum_logits": -1.2498157024383545, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.2498157024383545, "logits_per_char": -0.6249078512191772, "bits_per_byte": 0.9015514579671329, "num_chars": 2}, {"sum_logits": -1.7328994274139404, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.7328994274139404, "logits_per_char": -0.8664497137069702, "bits_per_byte": 1.250022705145606, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1115, "native_id": "MDSA_2008_8_20", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.547985315322876, "logits_per_token_corr": -1.547985315322876, "logits_per_char_corr": -0.773992657661438, "bits_per_byte_corr": 1.116635368893428}, "model_output": [{"sum_logits": -1.1959974765777588, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.1959974765777588, "logits_per_char": -0.5979987382888794, "bits_per_byte": 0.8627298141878432, "num_chars": 2}, {"sum_logits": -1.2035105228424072, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.2035105228424072, "logits_per_char": -0.6017552614212036, "bits_per_byte": 0.8681493314818359, "num_chars": 2}, {"sum_logits": -1.547985315322876, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.547985315322876, "logits_per_char": -0.773992657661438, "bits_per_byte": 1.116635368893428, "num_chars": 2}, {"sum_logits": -1.7396776676177979, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.7396776676177979, "logits_per_char": -0.8698388338088989, "bits_per_byte": 1.2549121719096392, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1116, "native_id": "Mercury_SC_400134", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8383816480636597, "logits_per_token_corr": -1.8383816480636597, "logits_per_char_corr": -0.9191908240318298, "bits_per_byte_corr": 1.3261120434623135}, "model_output": [{"sum_logits": -1.1509026288986206, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.1509026288986206, "logits_per_char": -0.5754513144493103, "bits_per_byte": 0.830200757629652, "num_chars": 2}, {"sum_logits": -1.230326533317566, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.230326533317566, "logits_per_char": -0.615163266658783, "bits_per_byte": 0.8874929941463188, "num_chars": 2}, {"sum_logits": -1.5061081647872925, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.5061081647872925, "logits_per_char": -0.7530540823936462, "bits_per_byte": 1.0864273901912829, "num_chars": 2}, {"sum_logits": -1.8383816480636597, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.8383816480636597, "logits_per_char": -0.9191908240318298, "bits_per_byte": 1.3261120434623135, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1117, "native_id": "Mercury_SC_LBS10265", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6495914459228516, "logits_per_token_corr": -1.6495914459228516, "logits_per_char_corr": -0.8247957229614258, "bits_per_byte_corr": 1.1899286992637312}, "model_output": [{"sum_logits": -1.5577449798583984, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.5577449798583984, "logits_per_char": -0.7788724899291992, "bits_per_byte": 1.1236754787065022, "num_chars": 2}, {"sum_logits": -1.3400802612304688, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": true, "logits_per_token": -1.3400802612304688, "logits_per_char": -0.6700401306152344, "bits_per_byte": 0.9666635736358864, "num_chars": 2}, {"sum_logits": -1.6495914459228516, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.6495914459228516, "logits_per_char": -0.8247957229614258, "bits_per_byte": 1.1899286992637312, "num_chars": 2}, {"sum_logits": -1.678924560546875, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.678924560546875, "logits_per_char": -0.8394622802734375, "bits_per_byte": 1.2110880687646994, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1118, "native_id": "Mercury_7188580", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3116554021835327, "logits_per_token_corr": -1.3116554021835327, "logits_per_char_corr": -0.6558277010917664, "bits_per_byte_corr": 0.9461593720433805}, "model_output": [{"sum_logits": -1.3116554021835327, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.3116554021835327, "logits_per_char": -0.6558277010917664, "bits_per_byte": 0.9461593720433805, "num_chars": 2}, {"sum_logits": -1.0820850133895874, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.0820850133895874, "logits_per_char": -0.5410425066947937, "bits_per_byte": 0.7805593413192734, "num_chars": 2}, {"sum_logits": -1.3701595067977905, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.3701595067977905, "logits_per_char": -0.6850797533988953, "bits_per_byte": 0.9883611628427302, "num_chars": 2}, {"sum_logits": -2.04850435256958, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -2.04850435256958, "logits_per_char": -1.02425217628479, "bits_per_byte": 1.4776835353468565, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1119, "native_id": "Mercury_402348", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6363983154296875, "logits_per_token_corr": -1.6363983154296875, "logits_per_char_corr": -0.8181991577148438, "bits_per_byte_corr": 1.18041186729558}, "model_output": [{"sum_logits": -1.6363983154296875, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.6363983154296875, "logits_per_char": -0.8181991577148438, "bits_per_byte": 1.18041186729558, "num_chars": 2}, {"sum_logits": -1.2338981628417969, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.2338981628417969, "logits_per_char": -0.6169490814208984, "bits_per_byte": 0.890069380247571, "num_chars": 2}, {"sum_logits": -1.4191722869873047, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4191722869873047, "logits_per_char": -0.7095861434936523, "bits_per_byte": 1.023716410302552, "num_chars": 2}, {"sum_logits": -1.4675846099853516, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4675846099853516, "logits_per_char": -0.7337923049926758, "bits_per_byte": 1.0586385194561756, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1120, "native_id": "Mercury_7030555", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3429096937179565, "logits_per_token_corr": -1.3429096937179565, "logits_per_char_corr": -0.6714548468589783, "bits_per_byte_corr": 0.9687045777450023}, "model_output": [{"sum_logits": -1.2482060194015503, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.2482060194015503, "logits_per_char": -0.6241030097007751, "bits_per_byte": 0.9003903170998317, "num_chars": 2}, {"sum_logits": -1.3307617902755737, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.3307617902755737, "logits_per_char": -0.6653808951377869, "bits_per_byte": 0.9599417177182341, "num_chars": 2}, {"sum_logits": -1.3429096937179565, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.3429096937179565, "logits_per_char": -0.6714548468589783, "bits_per_byte": 0.9687045777450023, "num_chars": 2}, {"sum_logits": -1.838469386100769, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.838469386100769, "logits_per_char": -0.9192346930503845, "bits_per_byte": 1.326175333077831, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1121, "native_id": "Mercury_SC_415453", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5340368747711182, "logits_per_token_corr": -1.5340368747711182, "logits_per_char_corr": -0.7670184373855591, "bits_per_byte_corr": 1.106573695887343}, "model_output": [{"sum_logits": -1.5340368747711182, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.5340368747711182, "logits_per_char": -0.7670184373855591, "bits_per_byte": 1.106573695887343, "num_chars": 2}, {"sum_logits": -1.562988519668579, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.562988519668579, "logits_per_char": -0.7814942598342896, "bits_per_byte": 1.1274578931469306, "num_chars": 2}, {"sum_logits": -1.2621614933013916, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.2621614933013916, "logits_per_char": -0.6310807466506958, "bits_per_byte": 0.9104570635941173, "num_chars": 2}, {"sum_logits": -1.2902281284332275, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.2902281284332275, "logits_per_char": -0.6451140642166138, "bits_per_byte": 0.9307028612537016, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1122, "native_id": "Mercury_7074848", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4512193202972412, "logits_per_token_corr": -1.4512193202972412, "logits_per_char_corr": -0.7256096601486206, "bits_per_byte_corr": 1.0468334583182932}, "model_output": [{"sum_logits": -1.4512193202972412, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4512193202972412, "logits_per_char": -0.7256096601486206, "bits_per_byte": 1.0468334583182932, "num_chars": 2}, {"sum_logits": -1.1032464504241943, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.1032464504241943, "logits_per_char": -0.5516232252120972, "bits_per_byte": 0.7958240914532402, "num_chars": 2}, {"sum_logits": -1.4474503993988037, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4474503993988037, "logits_per_char": -0.7237251996994019, "bits_per_byte": 1.0441147565734519, "num_chars": 2}, {"sum_logits": -1.6869027614593506, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.6869027614593506, "logits_per_char": -0.8434513807296753, "bits_per_byte": 1.2168431242105258, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1123, "native_id": "Mercury_SC_400582", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2645384073257446, "logits_per_token_corr": -1.2645384073257446, "logits_per_char_corr": -0.6322692036628723, "bits_per_byte_corr": 0.9121716446318953}, "model_output": [{"sum_logits": -1.2645384073257446, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.2645384073257446, "logits_per_char": -0.6322692036628723, "bits_per_byte": 0.9121716446318953, "num_chars": 2}, {"sum_logits": -1.1458269357681274, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.1458269357681274, "logits_per_char": -0.5729134678840637, "bits_per_byte": 0.8265394189754309, "num_chars": 2}, {"sum_logits": -1.4644991159439087, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4644991159439087, "logits_per_char": -0.7322495579719543, "bits_per_byte": 1.056412805980033, "num_chars": 2}, {"sum_logits": -1.9325014352798462, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.9325014352798462, "logits_per_char": -0.9662507176399231, "bits_per_byte": 1.3940051185955207, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1124, "native_id": "Mercury_SC_401168", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4861432313919067, "logits_per_token_corr": -1.4861432313919067, "logits_per_char_corr": -0.7430716156959534, "bits_per_byte_corr": 1.0720257349906717}, "model_output": [{"sum_logits": -1.269863247871399, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.269863247871399, "logits_per_char": -0.6349316239356995, "bits_per_byte": 0.916012705156268, "num_chars": 2}, {"sum_logits": -1.2848149538040161, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.2848149538040161, "logits_per_char": -0.6424074769020081, "bits_per_byte": 0.9267980811571842, "num_chars": 2}, {"sum_logits": -1.4861432313919067, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4861432313919067, "logits_per_char": -0.7430716156959534, "bits_per_byte": 1.0720257349906717, "num_chars": 2}, {"sum_logits": -1.6400681734085083, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.6400681734085083, "logits_per_char": -0.8200340867042542, "bits_per_byte": 1.1830591102489876, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1125, "native_id": "Mercury_180828", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4682999849319458, "logits_per_token_corr": -1.4682999849319458, "logits_per_char_corr": -0.7341499924659729, "bits_per_byte_corr": 1.05915455340009}, "model_output": [{"sum_logits": -1.2551060914993286, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.2551060914993286, "logits_per_char": -0.6275530457496643, "bits_per_byte": 0.9053676669984559, "num_chars": 2}, {"sum_logits": -1.0885316133499146, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.0885316133499146, "logits_per_char": -0.5442658066749573, "bits_per_byte": 0.7852095802159562, "num_chars": 2}, {"sum_logits": -1.4682999849319458, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4682999849319458, "logits_per_char": -0.7341499924659729, "bits_per_byte": 1.05915455340009, "num_chars": 2}, {"sum_logits": -1.9933816194534302, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.9933816194534302, "logits_per_char": -0.9966908097267151, "bits_per_byte": 1.4379208884933703, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1126, "native_id": "FCAT_2008_5_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3003679513931274, "logits_per_token_corr": -1.3003679513931274, "logits_per_char_corr": -0.6501839756965637, "bits_per_byte_corr": 0.9380171974035767}, "model_output": [{"sum_logits": -1.3003679513931274, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3003679513931274, "logits_per_char": -0.6501839756965637, "bits_per_byte": 0.9380171974035767, "num_chars": 2}, {"sum_logits": -1.1275049448013306, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.1275049448013306, "logits_per_char": -0.5637524724006653, "bits_per_byte": 0.8133228962219163, "num_chars": 2}, {"sum_logits": -1.4046446084976196, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4046446084976196, "logits_per_char": -0.7023223042488098, "bits_per_byte": 1.0132369054461956, "num_chars": 2}, {"sum_logits": -1.9356027841567993, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.9356027841567993, "logits_per_char": -0.9678013920783997, "bits_per_byte": 1.3962422689179457, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1127, "native_id": "TAKS_2009_5_25", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.475030541419983, "logits_per_token_corr": -1.475030541419983, "logits_per_char_corr": -0.7375152707099915, "bits_per_byte_corr": 1.0640096236339505}, "model_output": [{"sum_logits": -1.2822109460830688, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.2822109460830688, "logits_per_char": -0.6411054730415344, "bits_per_byte": 0.9249196866444592, "num_chars": 2}, {"sum_logits": -1.011670708656311, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.011670708656311, "logits_per_char": -0.5058353543281555, "bits_per_byte": 0.7297661571960659, "num_chars": 2}, {"sum_logits": -1.475030541419983, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.475030541419983, "logits_per_char": -0.7375152707099915, "bits_per_byte": 1.0640096236339505, "num_chars": 2}, {"sum_logits": -2.1177687644958496, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -2.1177687644958496, "logits_per_char": -1.0588843822479248, "bits_per_byte": 1.5276472471449523, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1128, "native_id": "Mercury_SC_LBS10392", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.421995997428894, "logits_per_token_corr": -1.421995997428894, "logits_per_char_corr": -0.710997998714447, "bits_per_byte_corr": 1.0257532868280472}, "model_output": [{"sum_logits": -1.421995997428894, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.421995997428894, "logits_per_char": -0.710997998714447, "bits_per_byte": 1.0257532868280472, "num_chars": 2}, {"sum_logits": -1.1077488660812378, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.1077488660812378, "logits_per_char": -0.5538744330406189, "bits_per_byte": 0.7990718978234612, "num_chars": 2}, {"sum_logits": -1.326607584953308, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.326607584953308, "logits_per_char": -0.663303792476654, "bits_per_byte": 0.9569450920095984, "num_chars": 2}, {"sum_logits": -1.8932503461837769, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.8932503461837769, "logits_per_char": -0.9466251730918884, "bits_per_byte": 1.3656914428013052, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1129, "native_id": "Mercury_7212905", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2219982147216797, "logits_per_token_corr": -1.2219982147216797, "logits_per_char_corr": -0.6109991073608398, "bits_per_byte_corr": 0.8814853821777002}, "model_output": [{"sum_logits": -1.1527328491210938, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": true, "logits_per_token": -1.1527328491210938, "logits_per_char": -0.5763664245605469, "bits_per_byte": 0.8315209824490012, "num_chars": 2}, {"sum_logits": -1.2219982147216797, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.2219982147216797, "logits_per_char": -0.6109991073608398, "bits_per_byte": 0.8814853821777002, "num_chars": 2}, {"sum_logits": -1.5015850067138672, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.5015850067138672, "logits_per_char": -0.7507925033569336, "bits_per_byte": 1.0831646213304367, "num_chars": 2}, {"sum_logits": -1.8807315826416016, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.8807315826416016, "logits_per_char": -0.9403657913208008, "bits_per_byte": 1.3566610637611198, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1130, "native_id": "Mercury_7212888", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2833939790725708, "logits_per_token_corr": -1.2833939790725708, "logits_per_char_corr": -0.6416969895362854, "bits_per_byte_corr": 0.9257730645580411}, "model_output": [{"sum_logits": -1.2833939790725708, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.2833939790725708, "logits_per_char": -0.6416969895362854, "bits_per_byte": 0.9257730645580411, "num_chars": 2}, {"sum_logits": -1.2619668245315552, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.2619668245315552, "logits_per_char": -0.6309834122657776, "bits_per_byte": 0.9103166397596877, "num_chars": 2}, {"sum_logits": -1.3476334810256958, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3476334810256958, "logits_per_char": -0.6738167405128479, "bits_per_byte": 0.9721120700065495, "num_chars": 2}, {"sum_logits": -1.812759518623352, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.812759518623352, "logits_per_char": -0.906379759311676, "bits_per_byte": 1.3076295839220267, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1131, "native_id": "MDSA_2007_8_42", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.037863254547119, "logits_per_token_corr": -2.037863254547119, "logits_per_char_corr": -1.0189316272735596, "bits_per_byte_corr": 1.470007605673542}, "model_output": [{"sum_logits": -1.2577767372131348, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.2577767372131348, "logits_per_char": -0.6288883686065674, "bits_per_byte": 0.9072941306620971, "num_chars": 2}, {"sum_logits": -1.1421704292297363, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.1421704292297363, "logits_per_char": -0.5710852146148682, "bits_per_byte": 0.8239018070504716, "num_chars": 2}, {"sum_logits": -1.3762078285217285, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.3762078285217285, "logits_per_char": -0.6881039142608643, "bits_per_byte": 0.9927241047211466, "num_chars": 2}, {"sum_logits": -2.037863254547119, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -2.037863254547119, "logits_per_char": -1.0189316272735596, "bits_per_byte": 1.470007605673542, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1132, "native_id": "Mercury_SC_415534", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4055144786834717, "logits_per_token_corr": -1.4055144786834717, "logits_per_char_corr": -0.7027572393417358, "bits_per_byte_corr": 1.013864384147869}, "model_output": [{"sum_logits": -1.3734748363494873, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3734748363494873, "logits_per_char": -0.6867374181747437, "bits_per_byte": 0.9907526675943048, "num_chars": 2}, {"sum_logits": -1.4055144786834717, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4055144786834717, "logits_per_char": -0.7027572393417358, "bits_per_byte": 1.013864384147869, "num_chars": 2}, {"sum_logits": -1.1488215923309326, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.1488215923309326, "logits_per_char": -0.5744107961654663, "bits_per_byte": 0.8286996070615948, "num_chars": 2}, {"sum_logits": -1.8278586864471436, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.8278586864471436, "logits_per_char": -0.9139293432235718, "bits_per_byte": 1.3185213311925017, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1133, "native_id": "Mercury_7213413", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.7716275453567505, "logits_per_token_corr": -1.7716275453567505, "logits_per_char_corr": -0.8858137726783752, "bits_per_byte_corr": 1.2779591369951537}, "model_output": [{"sum_logits": -1.3335648775100708, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.3335648775100708, "logits_per_char": -0.6667824387550354, "bits_per_byte": 0.9619637177444297, "num_chars": 2}, {"sum_logits": -1.1354600191116333, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.1354600191116333, "logits_per_char": -0.5677300095558167, "bits_per_byte": 0.819061269350609, "num_chars": 2}, {"sum_logits": -1.4579602479934692, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.4579602479934692, "logits_per_char": -0.7289801239967346, "bits_per_byte": 1.0516960097974661, "num_chars": 2}, {"sum_logits": -1.7716275453567505, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.7716275453567505, "logits_per_char": -0.8858137726783752, "bits_per_byte": 1.2779591369951537, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1134, "native_id": "Mercury_7068635", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5685898065567017, "logits_per_token_corr": -1.5685898065567017, "logits_per_char_corr": -0.7842949032783508, "bits_per_byte_corr": 1.1314983675549788}, "model_output": [{"sum_logits": -1.3815323114395142, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3815323114395142, "logits_per_char": -0.6907661557197571, "bits_per_byte": 0.996564907271543, "num_chars": 2}, {"sum_logits": -1.1624046564102173, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.1624046564102173, "logits_per_char": -0.5812023282051086, "bits_per_byte": 0.8384977166552323, "num_chars": 2}, {"sum_logits": -1.5685898065567017, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.5685898065567017, "logits_per_char": -0.7842949032783508, "bits_per_byte": 1.1314983675549788, "num_chars": 2}, {"sum_logits": -1.5751854181289673, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.5751854181289673, "logits_per_char": -0.7875927090644836, "bits_per_byte": 1.136256095608451, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1135, "native_id": "Mercury_417137", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.190603256225586, "logits_per_token_corr": -1.190603256225586, "logits_per_char_corr": -0.595301628112793, "bits_per_byte_corr": 0.8588387067120694}, "model_output": [{"sum_logits": -1.5437755584716797, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.5437755584716797, "logits_per_char": -0.7718877792358398, "bits_per_byte": 1.113598671227141, "num_chars": 2}, {"sum_logits": -1.4308338165283203, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.4308338165283203, "logits_per_char": -0.7154169082641602, "bits_per_byte": 1.03212842572156, "num_chars": 2}, {"sum_logits": -1.190603256225586, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.190603256225586, "logits_per_char": -0.595301628112793, "bits_per_byte": 0.8588387067120694, "num_chars": 2}, {"sum_logits": -1.4761581420898438, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.4761581420898438, "logits_per_char": -0.7380790710449219, "bits_per_byte": 1.0648230155812066, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1136, "native_id": "Mercury_7268258", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4286468029022217, "logits_per_token_corr": -1.4286468029022217, "logits_per_char_corr": -0.7143234014511108, "bits_per_byte_corr": 1.0305508288651941}, "model_output": [{"sum_logits": -1.247643232345581, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.247643232345581, "logits_per_char": -0.6238216161727905, "bits_per_byte": 0.8999843520524698, "num_chars": 2}, {"sum_logits": -1.3346564769744873, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3346564769744873, "logits_per_char": -0.6673282384872437, "bits_per_byte": 0.9627511403114056, "num_chars": 2}, {"sum_logits": -1.4286468029022217, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4286468029022217, "logits_per_char": -0.7143234014511108, "bits_per_byte": 1.0305508288651941, "num_chars": 2}, {"sum_logits": -1.6544253826141357, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.6544253826141357, "logits_per_char": -0.8272126913070679, "bits_per_byte": 1.193415647509977, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1137, "native_id": "NAEP_2005_4_S13+14", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.5153820514678955, "logits_per_token_corr": -1.5153820514678955, "logits_per_char_corr": -0.7576910257339478, "bits_per_byte_corr": 1.0931170853532237}, "model_output": [{"sum_logits": -1.8428070545196533, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.8428070545196533, "logits_per_char": -0.9214035272598267, "bits_per_byte": 1.3293042994363058, "num_chars": 2}, {"sum_logits": -0.7102540135383606, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": true, "logits_per_token": -0.7102540135383606, "logits_per_char": -0.3551270067691803, "bits_per_byte": 0.5123399715520058, "num_chars": 2}, {"sum_logits": -1.5153820514678955, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.5153820514678955, "logits_per_char": -0.7576910257339478, "bits_per_byte": 1.0931170853532237, "num_chars": 2}, {"sum_logits": -2.3454067707061768, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -2.3454067707061768, "logits_per_char": -1.1727033853530884, "bits_per_byte": 1.6918533584838151, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1138, "native_id": "Mercury_SC_406089", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8890645503997803, "logits_per_token_corr": -1.8890645503997803, "logits_per_char_corr": -0.9445322751998901, "bits_per_byte_corr": 1.36267202939143}, "model_output": [{"sum_logits": -1.3409898281097412, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3409898281097412, "logits_per_char": -0.6704949140548706, "bits_per_byte": 0.9673196874489285, "num_chars": 2}, {"sum_logits": -1.1213891506195068, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.1213891506195068, "logits_per_char": -0.5606945753097534, "bits_per_byte": 0.8089112832533059, "num_chars": 2}, {"sum_logits": -1.3720204830169678, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3720204830169678, "logits_per_char": -0.6860102415084839, "bits_per_byte": 0.9897035734240409, "num_chars": 2}, {"sum_logits": -1.8890645503997803, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.8890645503997803, "logits_per_char": -0.9445322751998901, "bits_per_byte": 1.36267202939143, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1139, "native_id": "Mercury_SC_400700", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4531019926071167, "logits_per_token_corr": -1.4531019926071167, "logits_per_char_corr": -0.7265509963035583, "bits_per_byte_corr": 1.0481915193208322}, "model_output": [{"sum_logits": -1.2758654356002808, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.2758654356002808, "logits_per_char": -0.6379327178001404, "bits_per_byte": 0.9203423683917423, "num_chars": 2}, {"sum_logits": -1.0431135892868042, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.0431135892868042, "logits_per_char": -0.5215567946434021, "bits_per_byte": 0.7524474011745202, "num_chars": 2}, {"sum_logits": -1.4531019926071167, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4531019926071167, "logits_per_char": -0.7265509963035583, "bits_per_byte": 1.0481915193208322, "num_chars": 2}, {"sum_logits": -2.098104476928711, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -2.098104476928711, "logits_per_char": -1.0490522384643555, "bits_per_byte": 1.5134624620670791, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1140, "native_id": "Mercury_7223493", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.176027774810791, "logits_per_token_corr": -1.176027774810791, "logits_per_char_corr": -0.5880138874053955, "bits_per_byte_corr": 0.8483247193342148}, "model_output": [{"sum_logits": -1.176027774810791, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.176027774810791, "logits_per_char": -0.5880138874053955, "bits_per_byte": 0.8483247193342148, "num_chars": 2}, {"sum_logits": -1.0422120094299316, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.0422120094299316, "logits_per_char": -0.5211060047149658, "bits_per_byte": 0.7517970487802821, "num_chars": 2}, {"sum_logits": -1.6097359657287598, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.6097359657287598, "logits_per_char": -0.8048679828643799, "bits_per_byte": 1.1611790474495782, "num_chars": 2}, {"sum_logits": -2.0602803230285645, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -2.0602803230285645, "logits_per_char": -1.0301401615142822, "bits_per_byte": 1.4861781024382785, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1141, "native_id": "Mercury_SC_405928", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6822818517684937, "logits_per_token_corr": -1.6822818517684937, "logits_per_char_corr": -0.8411409258842468, "bits_per_byte_corr": 1.213509842462826}, "model_output": [{"sum_logits": -1.4938758611679077, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4938758611679077, "logits_per_char": -0.7469379305839539, "bits_per_byte": 1.077603648306109, "num_chars": 2}, {"sum_logits": -1.1066535711288452, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.1066535711288452, "logits_per_char": -0.5533267855644226, "bits_per_byte": 0.7982818095253968, "num_chars": 2}, {"sum_logits": -1.3938430547714233, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3938430547714233, "logits_per_char": -0.6969215273857117, "bits_per_byte": 1.0054452314488505, "num_chars": 2}, {"sum_logits": -1.6822818517684937, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.6822818517684937, "logits_per_char": -0.8411409258842468, "bits_per_byte": 1.213509842462826, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1142, "native_id": "MCAS_2009_5_6518", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.056589126586914, "logits_per_token_corr": -2.056589126586914, "logits_per_char_corr": -1.028294563293457, "bits_per_byte_corr": 1.4835154670376185}, "model_output": [{"sum_logits": -1.03668212890625, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.03668212890625, "logits_per_char": -0.518341064453125, "bits_per_byte": 0.7478080831761672, "num_chars": 2}, {"sum_logits": -1.3039741516113281, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3039741516113281, "logits_per_char": -0.6519870758056641, "bits_per_byte": 0.940618520989204, "num_chars": 2}, {"sum_logits": -1.4556198120117188, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4556198120117188, "logits_per_char": -0.7278099060058594, "bits_per_byte": 1.0500077421052703, "num_chars": 2}, {"sum_logits": -2.056589126586914, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -2.056589126586914, "logits_per_char": -1.028294563293457, "bits_per_byte": 1.4835154670376185, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1143, "native_id": "MCAS_2006_9_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.0538626909255981, "logits_per_token_corr": -1.0538626909255981, "logits_per_char_corr": -0.5269313454627991, "bits_per_byte_corr": 0.7602012389886755}, "model_output": [{"sum_logits": -1.4256163835525513, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.4256163835525513, "logits_per_char": -0.7128081917762756, "bits_per_byte": 1.0283648433814008, "num_chars": 2}, {"sum_logits": -1.0538626909255981, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": true, "logits_per_token": -1.0538626909255981, "logits_per_char": -0.5269313454627991, "bits_per_byte": 0.7602012389886755, "num_chars": 2}, {"sum_logits": -1.3257533311843872, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.3257533311843872, "logits_per_char": -0.6628766655921936, "bits_per_byte": 0.9563288781715565, "num_chars": 2}, {"sum_logits": -2.0008859634399414, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -2.0008859634399414, "logits_per_char": -1.0004429817199707, "bits_per_byte": 1.4433341284206065, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1144, "native_id": "Mercury_7239383", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.046126127243042, "logits_per_token_corr": -1.046126127243042, "logits_per_char_corr": -0.523063063621521, "bits_per_byte_corr": 0.7546204879594989}, "model_output": [{"sum_logits": -1.4662806987762451, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4662806987762451, "logits_per_char": -0.7331403493881226, "bits_per_byte": 1.0576979463386063, "num_chars": 2}, {"sum_logits": -1.046126127243042, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.046126127243042, "logits_per_char": -0.523063063621521, "bits_per_byte": 0.7546204879594989, "num_chars": 2}, {"sum_logits": -1.322808027267456, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.322808027267456, "logits_per_char": -0.661404013633728, "bits_per_byte": 0.9542042904941213, "num_chars": 2}, {"sum_logits": -1.9634425640106201, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.9634425640106201, "logits_per_char": -0.9817212820053101, "bits_per_byte": 1.4163244250852338, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1145, "native_id": "Mercury_SC_400130", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.160954475402832, "logits_per_token_corr": -1.160954475402832, "logits_per_char_corr": -0.580477237701416, "bits_per_byte_corr": 0.8374516321813585}, "model_output": [{"sum_logits": -1.1372308731079102, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.1372308731079102, "logits_per_char": -0.5686154365539551, "bits_per_byte": 0.8203386704898934, "num_chars": 2}, {"sum_logits": -1.160954475402832, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.160954475402832, "logits_per_char": -0.580477237701416, "bits_per_byte": 0.8374516321813585, "num_chars": 2}, {"sum_logits": -1.505436897277832, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.505436897277832, "logits_per_char": -0.752718448638916, "bits_per_byte": 1.0859431730377782, "num_chars": 2}, {"sum_logits": -2.088089942932129, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -2.088089942932129, "logits_per_char": -1.0440449714660645, "bits_per_byte": 1.5062385028002325, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1146, "native_id": "Mercury_401426", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.61046302318573, "logits_per_token_corr": -1.61046302318573, "logits_per_char_corr": -0.805231511592865, "bits_per_byte_corr": 1.1617035085433849}, "model_output": [{"sum_logits": -1.3447388410568237, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3447388410568237, "logits_per_char": -0.6723694205284119, "bits_per_byte": 0.9700240286424227, "num_chars": 2}, {"sum_logits": -1.2974919080734253, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.2974919080734253, "logits_per_char": -0.6487459540367126, "bits_per_byte": 0.9359425706862171, "num_chars": 2}, {"sum_logits": -1.3687695264816284, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3687695264816284, "logits_per_char": -0.6843847632408142, "bits_per_byte": 0.9873585039881994, "num_chars": 2}, {"sum_logits": -1.61046302318573, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.61046302318573, "logits_per_char": -0.805231511592865, "bits_per_byte": 1.1617035085433849, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1147, "native_id": "MCAS_2010_8_12016", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4206037521362305, "logits_per_token_corr": -1.4206037521362305, "logits_per_char_corr": -0.7103018760681152, "bits_per_byte_corr": 1.0247489941383332}, "model_output": [{"sum_logits": -1.426987648010254, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.426987648010254, "logits_per_char": -0.713493824005127, "bits_per_byte": 1.029354001597839, "num_chars": 2}, {"sum_logits": -1.107020378112793, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": true, "logits_per_token": -1.107020378112793, "logits_per_char": -0.5535101890563965, "bits_per_byte": 0.7985464048337495, "num_chars": 2}, {"sum_logits": -1.4206037521362305, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.4206037521362305, "logits_per_char": -0.7103018760681152, "bits_per_byte": 1.0247489941383332, "num_chars": 2}, {"sum_logits": -1.769679069519043, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.769679069519043, "logits_per_char": -0.8848395347595215, "bits_per_byte": 1.2765536087809763, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1148, "native_id": "Mercury_SC_400324", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1339374780654907, "logits_per_token_corr": -1.1339374780654907, "logits_per_char_corr": -0.5669687390327454, "bits_per_byte_corr": 0.8179629881421983}, "model_output": [{"sum_logits": -1.3059536218643188, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3059536218643188, "logits_per_char": -0.6529768109321594, "bits_per_byte": 0.9420464069479935, "num_chars": 2}, {"sum_logits": -1.1339374780654907, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.1339374780654907, "logits_per_char": -0.5669687390327454, "bits_per_byte": 0.8179629881421983, "num_chars": 2}, {"sum_logits": -1.318933129310608, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.318933129310608, "logits_per_char": -0.659466564655304, "bits_per_byte": 0.9514091424609715, "num_chars": 2}, {"sum_logits": -2.086902141571045, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -2.086902141571045, "logits_per_char": -1.0434510707855225, "bits_per_byte": 1.5053816852336335, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1149, "native_id": "Mercury_SC_LBS10662", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6243507862091064, "logits_per_token_corr": -1.6243507862091064, "logits_per_char_corr": -0.8121753931045532, "bits_per_byte_corr": 1.1717214119648252}, "model_output": [{"sum_logits": -1.1954166889190674, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.1954166889190674, "logits_per_char": -0.5977083444595337, "bits_per_byte": 0.8623108644503411, "num_chars": 2}, {"sum_logits": -1.1223862171173096, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.1223862171173096, "logits_per_char": -0.5611931085586548, "bits_per_byte": 0.8096305146992147, "num_chars": 2}, {"sum_logits": -1.6243507862091064, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.6243507862091064, "logits_per_char": -0.8121753931045532, "bits_per_byte": 1.1717214119648252, "num_chars": 2}, {"sum_logits": -1.8435585498809814, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.8435585498809814, "logits_per_char": -0.9217792749404907, "bits_per_byte": 1.3298463887518257, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1150, "native_id": "VASoL_2009_3_8", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4890310764312744, "logits_per_token_corr": -1.4890310764312744, "logits_per_char_corr": -0.7445155382156372, "bits_per_byte_corr": 1.074108874849249}, "model_output": [{"sum_logits": -1.3691160678863525, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3691160678863525, "logits_per_char": -0.6845580339431763, "bits_per_byte": 0.9876084807712286, "num_chars": 2}, {"sum_logits": -1.4696028232574463, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4696028232574463, "logits_per_char": -0.7348014116287231, "bits_per_byte": 1.0600943525957305, "num_chars": 2}, {"sum_logits": -1.3069937229156494, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3069937229156494, "logits_per_char": -0.6534968614578247, "bits_per_byte": 0.942796681262383, "num_chars": 2}, {"sum_logits": -1.4890310764312744, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4890310764312744, "logits_per_char": -0.7445155382156372, "bits_per_byte": 1.074108874849249, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1151, "native_id": "Mercury_SC_401185", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3833792209625244, "logits_per_token_corr": -1.3833792209625244, "logits_per_char_corr": -0.6916896104812622, "bits_per_byte_corr": 0.9978971708764527}, "model_output": [{"sum_logits": -1.2534716129302979, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.2534716129302979, "logits_per_char": -0.6267358064651489, "bits_per_byte": 0.904188639935465, "num_chars": 2}, {"sum_logits": -1.1252329349517822, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.1252329349517822, "logits_per_char": -0.5626164674758911, "bits_per_byte": 0.8116839875505181, "num_chars": 2}, {"sum_logits": -1.3833792209625244, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.3833792209625244, "logits_per_char": -0.6916896104812622, "bits_per_byte": 0.9978971708764527, "num_chars": 2}, {"sum_logits": -2.0499441623687744, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -2.0499441623687744, "logits_per_char": -1.0249720811843872, "bits_per_byte": 1.478722138575418, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1152, "native_id": "NYSEDREGENTS_2015_8_29", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4298298358917236, "logits_per_token_corr": -1.4298298358917236, "logits_per_char_corr": -0.7149149179458618, "bits_per_byte_corr": 1.031404206778776}, "model_output": [{"sum_logits": -1.3459045886993408, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3459045886993408, "logits_per_char": -0.6729522943496704, "bits_per_byte": 0.970864937813817, "num_chars": 2}, {"sum_logits": -1.2657921314239502, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2657921314239502, "logits_per_char": -0.6328960657119751, "bits_per_byte": 0.913076015401458, "num_chars": 2}, {"sum_logits": -1.4298298358917236, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4298298358917236, "logits_per_char": -0.7149149179458618, "bits_per_byte": 1.031404206778776, "num_chars": 2}, {"sum_logits": -1.6004154682159424, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.6004154682159424, "logits_per_char": -0.8002077341079712, "bits_per_byte": 1.1544557296793936, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1153, "native_id": "Mercury_7234378", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6517961025238037, "logits_per_token_corr": -1.6517961025238037, "logits_per_char_corr": -0.8258980512619019, "bits_per_byte_corr": 1.1915190228362607}, "model_output": [{"sum_logits": -1.5187928676605225, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.5187928676605225, "logits_per_char": -0.7593964338302612, "bits_per_byte": 1.0955774691564688, "num_chars": 2}, {"sum_logits": -1.1909005641937256, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.1909005641937256, "logits_per_char": -0.5954502820968628, "bits_per_byte": 0.8590531690776955, "num_chars": 2}, {"sum_logits": -1.293811559677124, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.293811559677124, "logits_per_char": -0.646905779838562, "bits_per_byte": 0.9332877604961715, "num_chars": 2}, {"sum_logits": -1.6517961025238037, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.6517961025238037, "logits_per_char": -0.8258980512619019, "bits_per_byte": 1.1915190228362607, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1154, "native_id": "ACTAAP_2014_7_3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -2.250837564468384, "logits_per_token_corr": -2.250837564468384, "logits_per_char_corr": -1.125418782234192, "bits_per_byte_corr": 1.6236360960537313}, "model_output": [{"sum_logits": -1.4839613437652588, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.4839613437652588, "logits_per_char": -0.7419806718826294, "bits_per_byte": 1.0704518357612995, "num_chars": 2}, {"sum_logits": -0.8847773671150208, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": true, "logits_per_token": -0.8847773671150208, "logits_per_char": -0.4423886835575104, "bits_per_byte": 0.6382319599142756, "num_chars": 2}, {"sum_logits": -1.412996530532837, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.412996530532837, "logits_per_char": -0.7064982652664185, "bits_per_byte": 1.0192615436972496, "num_chars": 2}, {"sum_logits": -2.250837564468384, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -2.250837564468384, "logits_per_char": -1.125418782234192, "bits_per_byte": 1.6236360960537313, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1155, "native_id": "MDSA_2008_8_27", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.8185213804244995, "logits_per_token_corr": -1.8185213804244995, "logits_per_char_corr": -0.9092606902122498, "bits_per_byte_corr": 1.3117858886454312}, "model_output": [{"sum_logits": -1.5410155057907104, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.5410155057907104, "logits_per_char": -0.7705077528953552, "bits_per_byte": 1.1116077140694265, "num_chars": 2}, {"sum_logits": -1.2628878355026245, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.2628878355026245, "logits_per_char": -0.6314439177513123, "bits_per_byte": 0.9109810087399711, "num_chars": 2}, {"sum_logits": -1.111841082572937, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.111841082572937, "logits_per_char": -0.5559205412864685, "bits_per_byte": 0.8020238080428725, "num_chars": 2}, {"sum_logits": -1.8185213804244995, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.8185213804244995, "logits_per_char": -0.9092606902122498, "bits_per_byte": 1.3117858886454312, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1156, "native_id": "Mercury_7004725", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2042350769042969, "logits_per_token_corr": -1.2042350769042969, "logits_per_char_corr": -0.6021175384521484, "bits_per_byte_corr": 0.8686719867578083}, "model_output": [{"sum_logits": -1.2042350769042969, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.2042350769042969, "logits_per_char": -0.6021175384521484, "bits_per_byte": 0.8686719867578083, "num_chars": 2}, {"sum_logits": -1.3140830993652344, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3140830993652344, "logits_per_char": -0.6570415496826172, "bits_per_byte": 0.9479105853857923, "num_chars": 2}, {"sum_logits": -1.3241844177246094, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3241844177246094, "logits_per_char": -0.6620922088623047, "bits_per_byte": 0.955197146337553, "num_chars": 2}, {"sum_logits": -1.8964080810546875, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.8964080810546875, "logits_per_char": -0.9482040405273438, "bits_per_byte": 1.3679692670206594, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1157, "native_id": "Mercury_405143", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.297243595123291, "logits_per_token_corr": -1.297243595123291, "logits_per_char_corr": -0.6486217975616455, "bits_per_byte_corr": 0.9357634507553434}, "model_output": [{"sum_logits": -1.5299344062805176, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.5299344062805176, "logits_per_char": -0.7649672031402588, "bits_per_byte": 1.1036143904139446, "num_chars": 2}, {"sum_logits": -1.263979434967041, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.263979434967041, "logits_per_char": -0.6319897174835205, "bits_per_byte": 0.9117684313069471, "num_chars": 2}, {"sum_logits": -1.297243595123291, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.297243595123291, "logits_per_char": -0.6486217975616455, "bits_per_byte": 0.9357634507553434, "num_chars": 2}, {"sum_logits": -1.5380616188049316, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.5380616188049316, "logits_per_char": -0.7690308094024658, "bits_per_byte": 1.10947693501656, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1158, "native_id": "MCAS_2003_8_7", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.3921303749084473, "logits_per_token_corr": -1.3921303749084473, "logits_per_char_corr": -0.6960651874542236, "bits_per_byte_corr": 1.0042097940763766}, "model_output": [{"sum_logits": -1.3921303749084473, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3921303749084473, "logits_per_char": -0.6960651874542236, "bits_per_byte": 1.0042097940763766, "num_chars": 2}, {"sum_logits": -1.3726105690002441, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3726105690002441, "logits_per_char": -0.6863052845001221, "bits_per_byte": 0.9901292304849266, "num_chars": 2}, {"sum_logits": -1.278719425201416, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.278719425201416, "logits_per_char": -0.639359712600708, "bits_per_byte": 0.922401086713897, "num_chars": 2}, {"sum_logits": -1.601768970489502, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.601768970489502, "logits_per_char": -0.800884485244751, "bits_per_byte": 1.1554320751883425, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1159, "native_id": "Mercury_SC_405341", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.362156867980957, "logits_per_token_corr": -1.362156867980957, "logits_per_char_corr": -0.6810784339904785, "bits_per_byte_corr": 0.9825884791751905}, "model_output": [{"sum_logits": -1.362156867980957, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.362156867980957, "logits_per_char": -0.6810784339904785, "bits_per_byte": 0.9825884791751905, "num_chars": 2}, {"sum_logits": -1.1937150955200195, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.1937150955200195, "logits_per_char": -0.5968575477600098, "bits_per_byte": 0.8610834242711324, "num_chars": 2}, {"sum_logits": -1.3311281204223633, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3311281204223633, "logits_per_char": -0.6655640602111816, "bits_per_byte": 0.9602059690612851, "num_chars": 2}, {"sum_logits": -1.7996110916137695, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.7996110916137695, "logits_per_char": -0.8998055458068848, "bits_per_byte": 1.2981449987009124, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1160, "native_id": "Mercury_7283833", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2020577192306519, "logits_per_token_corr": -1.2020577192306519, "logits_per_char_corr": -0.6010288596153259, "bits_per_byte_corr": 0.8671013551988026}, "model_output": [{"sum_logits": -1.4164646863937378, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4164646863937378, "logits_per_char": -0.7082323431968689, "bits_per_byte": 1.0217632893280273, "num_chars": 2}, {"sum_logits": -1.2020577192306519, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.2020577192306519, "logits_per_char": -0.6010288596153259, "bits_per_byte": 0.8671013551988026, "num_chars": 2}, {"sum_logits": -1.327158808708191, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.327158808708191, "logits_per_char": -0.6635794043540955, "bits_per_byte": 0.9573427158983935, "num_chars": 2}, {"sum_logits": -1.73289954662323, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.73289954662323, "logits_per_char": -0.866449773311615, "bits_per_byte": 1.2500227911369315, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1161, "native_id": "Mercury_7159303", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.6229227781295776, "logits_per_token_corr": -1.6229227781295776, "logits_per_char_corr": -0.8114613890647888, "bits_per_byte_corr": 1.1706913218774817}, "model_output": [{"sum_logits": -1.6229227781295776, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.6229227781295776, "logits_per_char": -0.8114613890647888, "bits_per_byte": 1.1706913218774817, "num_chars": 2}, {"sum_logits": -1.3583124876022339, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.3583124876022339, "logits_per_char": -0.6791562438011169, "bits_per_byte": 0.9798153449213511, "num_chars": 2}, {"sum_logits": -1.4053858518600464, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4053858518600464, "logits_per_char": -0.7026929259300232, "bits_per_byte": 1.0137715995077285, "num_chars": 2}, {"sum_logits": -1.3144015073776245, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.3144015073776245, "logits_per_char": -0.6572007536888123, "bits_per_byte": 0.9481402682160197, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1162, "native_id": "Mercury_406427", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2867521047592163, "logits_per_token_corr": -1.2867521047592163, "logits_per_char_corr": -0.6433760523796082, "bits_per_byte_corr": 0.9281954401954455}, "model_output": [{"sum_logits": -1.4646581411361694, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4646581411361694, "logits_per_char": -0.7323290705680847, "bits_per_byte": 1.0565275184081586, "num_chars": 2}, {"sum_logits": -1.1124128103256226, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.1124128103256226, "logits_per_char": -0.5562064051628113, "bits_per_byte": 0.8024362224396419, "num_chars": 2}, {"sum_logits": -1.2867521047592163, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.2867521047592163, "logits_per_char": -0.6433760523796082, "bits_per_byte": 0.9281954401954455, "num_chars": 2}, {"sum_logits": -1.9104856252670288, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.9104856252670288, "logits_per_char": -0.9552428126335144, "bits_per_byte": 1.3781240686321865, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1163, "native_id": "Mercury_SC_414129", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.401711344718933, "logits_per_token_corr": -1.401711344718933, "logits_per_char_corr": -0.7008556723594666, "bits_per_byte_corr": 1.011121002892629}, "model_output": [{"sum_logits": -1.436135172843933, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.436135172843933, "logits_per_char": -0.7180675864219666, "bits_per_byte": 1.0359525459548227, "num_chars": 2}, {"sum_logits": -1.401711344718933, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.401711344718933, "logits_per_char": -0.7008556723594666, "bits_per_byte": 1.011121002892629, "num_chars": 2}, {"sum_logits": -1.2117966413497925, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.2117966413497925, "logits_per_char": -0.6058983206748962, "bits_per_byte": 0.8741265025212517, "num_chars": 2}, {"sum_logits": -1.6182602643966675, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.6182602643966675, "logits_per_char": -0.8091301321983337, "bits_per_byte": 1.1673280291572061, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1164, "native_id": "Mercury_7108990", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.160895824432373, "logits_per_token_corr": -1.160895824432373, "logits_per_char_corr": -0.5804479122161865, "bits_per_byte_corr": 0.8374093244492463}, "model_output": [{"sum_logits": -1.160895824432373, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.160895824432373, "logits_per_char": -0.5804479122161865, "bits_per_byte": 0.8374093244492463, "num_chars": 2}, {"sum_logits": -1.155900478363037, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.155900478363037, "logits_per_char": -0.5779502391815186, "bits_per_byte": 0.8338059439483662, "num_chars": 2}, {"sum_logits": -1.5172991752624512, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.5172991752624512, "logits_per_char": -0.7586495876312256, "bits_per_byte": 1.0944999978488126, "num_chars": 2}, {"sum_logits": -2.0027403831481934, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -2.0027403831481934, "logits_per_char": -1.0013701915740967, "bits_per_byte": 1.4446718094790185, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1165, "native_id": "Mercury_SC_407315", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2136404514312744, "logits_per_token_corr": -1.2136404514312744, "logits_per_char_corr": -0.6068202257156372, "bits_per_byte_corr": 0.8754565303517002}, "model_output": [{"sum_logits": -1.2136404514312744, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.2136404514312744, "logits_per_char": -0.6068202257156372, "bits_per_byte": 0.8754565303517002, "num_chars": 2}, {"sum_logits": -0.9620591998100281, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -0.9620591998100281, "logits_per_char": -0.48102959990501404, "bits_per_byte": 0.6939790183042646, "num_chars": 2}, {"sum_logits": -1.5670320987701416, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.5670320987701416, "logits_per_char": -0.7835160493850708, "bits_per_byte": 1.130374718905566, "num_chars": 2}, {"sum_logits": -2.2571260929107666, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -2.2571260929107666, "logits_per_char": -1.1285630464553833, "bits_per_byte": 1.628172310452892, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1166, "native_id": "Mercury_SC_408663", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -0.9777332544326782, "logits_per_token_corr": -0.9777332544326782, "logits_per_char_corr": -0.4888666272163391, "bits_per_byte_corr": 0.7052854587416327}, "model_output": [{"sum_logits": -1.387658715248108, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.387658715248108, "logits_per_char": -0.693829357624054, "bits_per_byte": 1.0009841734681169, "num_chars": 2}, {"sum_logits": -0.9777332544326782, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": true, "logits_per_token": -0.9777332544326782, "logits_per_char": -0.4888666272163391, "bits_per_byte": 0.7052854587416327, "num_chars": 2}, {"sum_logits": -1.4714542627334595, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.4714542627334595, "logits_per_char": -0.7357271313667297, "bits_per_byte": 1.0614298838710066, "num_chars": 2}, {"sum_logits": -2.1069254875183105, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -2.1069254875183105, "logits_per_char": -1.0534627437591553, "bits_per_byte": 1.519825476183706, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1167, "native_id": "MEA_2013_8_18", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.1089379787445068, "logits_per_token_corr": -1.1089379787445068, "logits_per_char_corr": -0.5544689893722534, "bits_per_byte_corr": 0.79992966129464}, "model_output": [{"sum_logits": -1.1089379787445068, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.1089379787445068, "logits_per_char": -0.5544689893722534, "bits_per_byte": 0.79992966129464, "num_chars": 2}, {"sum_logits": -1.2026269435882568, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.2026269435882568, "logits_per_char": -0.6013134717941284, "bits_per_byte": 0.8675119637777379, "num_chars": 2}, {"sum_logits": -1.4821622371673584, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4821622371673584, "logits_per_char": -0.7410811185836792, "bits_per_byte": 1.069154054677888, "num_chars": 2}, {"sum_logits": -2.0153825283050537, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -2.0153825283050537, "logits_per_char": -1.0076912641525269, "bits_per_byte": 1.4537911895410254, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1168, "native_id": "Mercury_7111125", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4590551853179932, "logits_per_token_corr": -1.4590551853179932, "logits_per_char_corr": -0.7295275926589966, "bits_per_byte_corr": 1.0524858401215542}, "model_output": [{"sum_logits": -1.4590551853179932, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4590551853179932, "logits_per_char": -0.7295275926589966, "bits_per_byte": 1.0524858401215542, "num_chars": 2}, {"sum_logits": -1.15248703956604, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.15248703956604, "logits_per_char": -0.57624351978302, "bits_per_byte": 0.8313436683359615, "num_chars": 2}, {"sum_logits": -1.3045942783355713, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3045942783355713, "logits_per_char": -0.6522971391677856, "bits_per_byte": 0.9410658478640984, "num_chars": 2}, {"sum_logits": -1.7925207614898682, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.7925207614898682, "logits_per_char": -0.8962603807449341, "bits_per_byte": 1.2930304066468996, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1169, "native_id": "LEAP_2009_8_10430", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.4352171421051025, "logits_per_token_corr": -1.4352171421051025, "logits_per_char_corr": -0.7176085710525513, "bits_per_byte_corr": 1.0352903267576747}, "model_output": [{"sum_logits": -1.4352171421051025, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4352171421051025, "logits_per_char": -0.7176085710525513, "bits_per_byte": 1.0352903267576747, "num_chars": 2}, {"sum_logits": -1.01277756690979, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.01277756690979, "logits_per_char": -0.506388783454895, "bits_per_byte": 0.7305645866526971, "num_chars": 2}, {"sum_logits": -1.3307592868804932, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3307592868804932, "logits_per_char": -0.6653796434402466, "bits_per_byte": 0.9599399119004001, "num_chars": 2}, {"sum_logits": -2.0757429599761963, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -2.0757429599761963, "logits_per_char": -1.0378714799880981, "bits_per_byte": 1.4973320372599939, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1170, "native_id": "Mercury_7165218", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.711380124092102, "logits_per_token_corr": -1.711380124092102, "logits_per_char_corr": -0.855690062046051, "bits_per_byte_corr": 1.2344998090526942}, "model_output": [{"sum_logits": -1.5115433931350708, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.5115433931350708, "logits_per_char": -0.7557716965675354, "bits_per_byte": 1.090348078683005, "num_chars": 2}, {"sum_logits": -1.1614590883255005, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.1614590883255005, "logits_per_char": -0.5807295441627502, "bits_per_byte": 0.83781563346191, "num_chars": 2}, {"sum_logits": -1.4136563539505005, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4136563539505005, "logits_per_char": -0.7068281769752502, "bits_per_byte": 1.0197375056835127, "num_chars": 2}, {"sum_logits": -1.711380124092102, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.711380124092102, "logits_per_char": -0.855690062046051, "bits_per_byte": 1.2344998090526942, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1171, "native_id": "MEA_2013_8_15", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": null, "no_answer": 0, "sum_logits_corr": -1.2801154851913452, "logits_per_token_corr": -1.2801154851913452, "logits_per_char_corr": -0.6400577425956726, "bits_per_byte_corr": 0.9234081311260249}, "model_output": [{"sum_logits": -1.2801154851913452, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.2801154851913452, "logits_per_char": -0.6400577425956726, "bits_per_byte": 0.9234081311260249, "num_chars": 2}, {"sum_logits": -1.2039340734481812, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.2039340734481812, "logits_per_char": -0.6019670367240906, "bits_per_byte": 0.868454858661094, "num_chars": 2}, {"sum_logits": -1.319820761680603, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.319820761680603, "logits_per_char": -0.6599103808403015, "bits_per_byte": 0.9520494338701342, "num_chars": 2}, {"sum_logits": -1.9548534154891968, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.9548534154891968, "logits_per_char": -0.9774267077445984, "bits_per_byte": 1.4101286640965713, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"}