{ "meta": { "model": "meta-llama/Llama-2-7b-chat-hf", "device": "cuda", "dtype": "fp32", "layer": 10, "task": "aqua", "eval_meta": { "subspace_split": null, "eval_split": "test", "available_splits": [ "train", "test", "validation" ], "hf_id": "aqua_rat", "options_prefix_stripped": true, "force_answer_prefix": true }, "candidate_labels": [ "A", "B", "C", "D", "E" ], "candidate_text_style": "space_letter", "candidate_token_lens": { "A": 2, "B": 2, "C": 2, "D": 2, "E": 2 }, "max_candidate_token_len": 2, "patch_windows": { "steps_0": [ 0 ], "steps_01": [ 0, 1 ], "full_steps": [ 0, 1 ], "note": "If steps_01 == full_steps then patched_01 == patched_full by design." }, "add_special_tokens_prompt": true, "seed": 123, "Qs_path": "results/subspace_patching_transfer/runs_layer10_seed123/Q_shared_layer10.npy", "Qs_shape": [ 4096, 97 ], "n_scanned": 254, "baseline_acc": 0.20866141732283464, "baseline_correct_n": 53, "ablated_acc": 0.2204724409448819, "ablated_correct_n": 56, "n_flips_total": 42, "n_flips_used": 42, "layers_path": "model.layers" }, "summary_on_flips": { "patched_0": { "n": 42, "rescued": 31, "rescued_pct": 73.80952380952381, "mean_dmargin": 3.3110272657303583, "median_dmargin": 3.0882368087768555 }, "patched_01": { "n": 42, "rescued": 42, "rescued_pct": 100.0, "mean_dmargin": 3.6945105280194963, "median_dmargin": 3.317805767059326 }, "patched_full": { "n": 42, "rescued": 42, "rescued_pct": 100.0, "mean_dmargin": 3.6945105280194963, "median_dmargin": 3.317805767059326 }, "control_rand_subspace": { "n": 42, "rescued": 2, "rescued_pct": 4.761904761904762, "mean_dmargin": 0.2848027887798491, "median_dmargin": 0.27750468254089355 }, "control_shared_randvec": { "n": 42, "rescued": 2, "rescued_pct": 4.761904761904762, "mean_dmargin": 0.38392406418209984, "median_dmargin": 0.4009871482849121 }, "control_time_shuffled": { "n": 42, "rescued": 32, "rescued_pct": 76.19047619047619, "mean_dmargin": 3.2988027050381614, "median_dmargin": 3.1241049766540527 }, "control_patch_nonshared": { "n": 42, "rescued": 0, "rescued_pct": 0.0, "mean_dmargin": 9.08261253720238e-08, "median_dmargin": 0.0 } }, "scan_rows": [ { "ex_id": "aqua-test-0", "gold": "B", "baseline": { "pred_label": "C", "correct": false, "margin": -0.5365619659423828, "scores": { "A": -9.953326225280762, "B": -9.932822227478027, "C": -9.396260261535645, "D": -11.750316619873047, "E": -11.375755310058594 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.246011734008789, "scores": { "A": -9.17184829711914, "B": -10.41786003112793, "C": -11.056268692016602, "D": -11.206304550170898, "E": -11.109382629394531 } } }, { "ex_id": "aqua-test-1", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -0.03298187255859375, "scores": { "A": -12.888943672180176, "B": -11.506059646606445, "C": -11.539041519165039, "D": -13.514416694641113, "E": -13.277742385864258 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -5.448361873626709, "scores": { "A": -6.55993127822876, "B": -11.09766960144043, "C": -12.008293151855469, "D": -11.017435073852539, "E": -12.980535507202148 } } }, { "ex_id": "aqua-test-2", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.2555389404296875, "scores": { "A": -11.233211517333984, "B": -10.210750579833984, "C": -13.17569351196289, "D": -12.437894821166992, "E": -10.466289520263672 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -7.949008941650391, "scores": { "A": -6.06699275970459, "B": -14.01600170135498, "C": -17.137845993041992, "D": -15.27363109588623, "E": -15.64785099029541 } } }, { "ex_id": "aqua-test-3", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -1.1184234619140625, "scores": { "A": -12.673524856567383, "B": -8.500896453857422, "C": -9.619319915771484, "D": -12.860542297363281, "E": -15.163476943969727 } }, "ablated": { "pred_label": "B", "correct": false, "margin": -1.0571308135986328, "scores": { "A": -9.761144638061523, "B": -9.180427551269531, "C": -10.237558364868164, "D": -11.047746658325195, "E": -10.9005126953125 } } }, { "ex_id": "aqua-test-4", "gold": "A", "baseline": { "pred_label": "A", "correct": true, "margin": 0.8914222717285156, "scores": { "A": -10.94589900970459, "B": -11.837321281433105, "C": -13.137775421142578, "D": -12.691411972045898, "E": -12.272680282592773 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 1.8114051818847656, "scores": { "A": -8.195756912231445, "B": -10.760305404663086, "C": -11.914056777954102, "D": -10.007162094116211, "E": -10.895750045776367 } } }, { "ex_id": "aqua-test-5", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.953394889831543, "scores": { "A": -11.989723205566406, "B": -10.97428035736084, "C": -12.035185813903809, "D": -11.961091041564941, "E": -11.927675247192383 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.1877222061157227, "scores": { "A": -7.596570014953613, "B": -9.784292221069336, "C": -11.036355018615723, "D": -9.200647354125977, "E": -10.078826904296875 } } }, { "ex_id": "aqua-test-6", "gold": "C", "baseline": { "pred_label": "A", "correct": false, "margin": -2.3978919982910156, "scores": { "A": -10.834028244018555, "B": -12.190977096557617, "C": -13.23192024230957, "D": -14.303913116455078, "E": -12.20677375793457 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -5.510491371154785, "scores": { "A": -7.008818626403809, "B": -9.866249084472656, "C": -12.519309997558594, "D": -11.021146774291992, "E": -11.009967803955078 } } }, { "ex_id": "aqua-test-7", "gold": "D", "baseline": { "pred_label": "C", "correct": false, "margin": -2.163623809814453, "scores": { "A": -10.552425384521484, "B": -9.352998733520508, "C": -8.957988739013672, "D": -11.121612548828125, "E": -10.957361221313477 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.8754777908325195, "scores": { "A": -6.72020149230957, "B": -10.692610740661621, "C": -10.54880428314209, "D": -10.59567928314209, "E": -12.170318603515625 } } }, { "ex_id": "aqua-test-8", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -2.255800247192383, "scores": { "A": -13.03713607788086, "B": -11.900215148925781, "C": -14.156015396118164, "D": -12.385900497436523, "E": -14.474089622497559 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -8.033926010131836, "scores": { "A": -6.845177173614502, "B": -11.928691864013672, "C": -14.87910270690918, "D": -11.124820709228516, "E": -12.958259582519531 } } }, { "ex_id": "aqua-test-9", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 1.0833330154418945, "scores": { "A": -11.261035919189453, "B": -8.873366355895996, "C": -9.95669937133789, "D": -12.33233642578125, "E": -13.964797973632812 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.0129852294921875, "scores": { "A": -7.305376052856445, "B": -11.318361282348633, "C": -11.48718547821045, "D": -13.66738224029541, "E": -15.269938468933105 } } }, { "ex_id": "aqua-test-10", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -0.20740604400634766, "scores": { "A": -11.915353775024414, "B": -10.265600204467773, "C": -13.313862800598145, "D": -11.45443344116211, "E": -10.473006248474121 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -7.0987701416015625, "scores": { "A": -7.162988662719727, "B": -11.00853157043457, "C": -14.745489120483398, "D": -12.574932098388672, "E": -14.261758804321289 } } }, { "ex_id": "aqua-test-11", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -2.032149314880371, "scores": { "A": -11.705740928649902, "B": -10.383201599121094, "C": -12.561548233032227, "D": -12.598165512084961, "E": -12.415350914001465 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.6796650886535645, "scores": { "A": -6.977108478546143, "B": -10.32332992553711, "C": -13.413308143615723, "D": -11.12005615234375, "E": -13.656773567199707 } } }, { "ex_id": "aqua-test-12", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -2.4920692443847656, "scores": { "A": -12.802289962768555, "B": -9.024707794189453, "C": -11.516777038574219, "D": -11.572513580322266, "E": -13.012077331542969 } }, "ablated": { "pred_label": "D", "correct": false, "margin": -3.181248664855957, "scores": { "A": -8.80724048614502, "B": -9.62839126586914, "C": -11.967851638793945, "D": -8.786602973937988, "E": -11.410276412963867 } } }, { "ex_id": "aqua-test-13", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -2.793048858642578, "scores": { "A": -12.858366012573242, "B": -9.46006965637207, "C": -9.54768180847168, "D": -12.253118515014648, "E": -12.870738983154297 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.821455955505371, "scores": { "A": -6.2456769943237305, "B": -9.579475402832031, "C": -8.059391021728516, "D": -10.067132949829102, "E": -13.029922485351562 } } }, { "ex_id": "aqua-test-14", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -1.6211061477661133, "scores": { "A": -12.599227905273438, "B": -9.088578224182129, "C": -10.55274772644043, "D": -10.709684371948242, "E": -9.514959335327148 } }, "ablated": { "pred_label": "D", "correct": true, "margin": 0.46077728271484375, "scores": { "A": -7.140524864196777, "B": -8.438863754272461, "C": -9.106466293334961, "D": -6.679747581481934, "E": -8.679572105407715 } } }, { "ex_id": "aqua-test-15", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.6604747772216797, "scores": { "A": -11.07632064819336, "B": -10.41584587097168, "C": -13.610551834106445, "D": -15.297096252441406, "E": -13.782489776611328 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.743229389190674, "scores": { "A": -6.1119704246521, "B": -10.855199813842773, "C": -11.251523971557617, "D": -11.053302764892578, "E": -13.566537857055664 } } }, { "ex_id": "aqua-test-16", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 2.796067237854004, "scores": { "A": -12.479905128479004, "B": -10.507231712341309, "C": -7.711164474487305, "D": -12.827747344970703, "E": -12.807977676391602 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -0.861086368560791, "scores": { "A": -7.834758281707764, "B": -9.467061996459961, "C": -8.695844650268555, "D": -9.597942352294922, "E": -11.696287155151367 } } }, { "ex_id": "aqua-test-17", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -1.0560503005981445, "scores": { "A": -11.58172607421875, "B": -10.525675773620605, "C": -12.54155158996582, "D": -12.84415054321289, "E": -12.90414810180664 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 4.481626510620117, "scores": { "A": -7.197931289672852, "B": -12.279987335205078, "C": -14.135135650634766, "D": -11.679557800292969, "E": -15.510787963867188 } } }, { "ex_id": "aqua-test-18", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -1.3985824584960938, "scores": { "A": -11.516077041625977, "B": -10.966100692749023, "C": -13.956039428710938, "D": -12.364683151245117, "E": -12.156122207641602 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.602712631225586, "scores": { "A": -7.199901580810547, "B": -10.85714340209961, "C": -14.79636001586914, "D": -11.802614212036133, "E": -15.477705001831055 } } }, { "ex_id": "aqua-test-19", "gold": "A", "baseline": { "pred_label": "C", "correct": false, "margin": -0.7771091461181641, "scores": { "A": -9.50829792022705, "B": -9.048941612243652, "C": -8.731188774108887, "D": -10.535305976867676, "E": -11.122632026672363 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 3.3860559463500977, "scores": { "A": -5.540165901184082, "B": -9.812980651855469, "C": -8.92622184753418, "D": -10.782073974609375, "E": -11.889626502990723 } } }, { "ex_id": "aqua-test-20", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -1.8421554565429688, "scores": { "A": -10.2200345993042, "B": -9.71203899383545, "C": -9.828819274902344, "D": -11.333096504211426, "E": -11.554194450378418 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -7.804967880249023, "scores": { "A": -6.537824630737305, "B": -9.35693645477295, "C": -12.991019248962402, "D": -10.14554500579834, "E": -14.342792510986328 } } }, { "ex_id": "aqua-test-21", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.7691888809204102, "scores": { "A": -10.39490795135498, "B": -9.62571907043457, "C": -12.538268089294434, "D": -12.220020294189453, "E": -11.351235389709473 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.5374608039855957, "scores": { "A": -6.129680156707764, "B": -9.66714096069336, "C": -10.800978660583496, "D": -10.67288875579834, "E": -11.187762260437012 } } }, { "ex_id": "aqua-test-22", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -2.974935531616211, "scores": { "A": -9.757966995239258, "B": -8.531810760498047, "C": -11.770942687988281, "D": -12.615520477294922, "E": -11.506746292114258 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -7.905245304107666, "scores": { "A": -6.365554332733154, "B": -10.006322860717773, "C": -12.58491325378418, "D": -13.247224807739258, "E": -14.27079963684082 } } }, { "ex_id": "aqua-test-23", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -3.1230697631835938, "scores": { "A": -9.921218872070312, "B": -8.795772552490234, "C": -10.068075180053711, "D": -11.918842315673828, "E": -10.69045639038086 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.665317535400391, "scores": { "A": -6.0819854736328125, "B": -10.304386138916016, "C": -12.034563064575195, "D": -12.747303009033203, "E": -13.464553833007812 } } }, { "ex_id": "aqua-test-24", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -0.5330438613891602, "scores": { "A": -11.630638122558594, "B": -10.715802192687988, "C": -13.269601821899414, "D": -11.420013427734375, "E": -11.248846054077148 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -0.1090230941772461, "scores": { "A": -7.273715019226074, "B": -7.602567672729492, "C": -9.413043975830078, "D": -7.838529586791992, "E": -7.38273811340332 } } }, { "ex_id": "aqua-test-25", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 0.06520843505859375, "scores": { "A": -12.949111938476562, "B": -12.246522903442383, "C": -12.181314468383789, "D": -12.397541046142578, "E": -13.614669799804688 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.5791339874267578, "scores": { "A": -8.643856048583984, "B": -10.894746780395508, "C": -10.222990036010742, "D": -9.472063064575195, "E": -10.86764144897461 } } }, { "ex_id": "aqua-test-26", "gold": "A", "baseline": { "pred_label": "C", "correct": false, "margin": -3.831747055053711, "scores": { "A": -12.59067440032959, "B": -10.750125885009766, "C": -8.758927345275879, "D": -12.953798294067383, "E": -10.869454383850098 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 0.6270813941955566, "scores": { "A": -7.691537380218506, "B": -10.792871475219727, "C": -8.318618774414062, "D": -9.645444869995117, "E": -11.4267578125 } } }, { "ex_id": "aqua-test-27", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -2.8056774139404297, "scores": { "A": -10.92805290222168, "B": -9.627052307128906, "C": -10.676057815551758, "D": -12.432729721069336, "E": -12.006237030029297 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.3288869857788086, "scores": { "A": -8.168229103088379, "B": -10.158320426940918, "C": -11.378348350524902, "D": -10.497116088867188, "E": -11.294864654541016 } } }, { "ex_id": "aqua-test-28", "gold": "A", "baseline": { "pred_label": "A", "correct": true, "margin": 1.3396778106689453, "scores": { "A": -10.445246696472168, "B": -11.784924507141113, "C": -12.745423316955566, "D": -13.060553550720215, "E": -13.08572769165039 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 4.244240760803223, "scores": { "A": -9.513802528381348, "B": -13.75804328918457, "C": -14.599481582641602, "D": -14.581493377685547, "E": -13.772315979003906 } } }, { "ex_id": "aqua-test-29", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -2.6846466064453125, "scores": { "A": -12.935235977172852, "B": -10.250589370727539, "C": -10.775646209716797, "D": -10.991680145263672, "E": -12.39478874206543 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 1.3767662048339844, "scores": { "A": -8.437541961669922, "B": -10.61314868927002, "C": -11.896113395690918, "D": -9.814308166503906, "E": -11.01830005645752 } } }, { "ex_id": "aqua-test-30", "gold": "D", "baseline": { "pred_label": "C", "correct": false, "margin": -4.338037490844727, "scores": { "A": -11.664570808410645, "B": -11.412707328796387, "C": -11.081539154052734, "D": -15.419576644897461, "E": -11.825617790222168 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.817980766296387, "scores": { "A": -6.548068046569824, "B": -12.580531120300293, "C": -14.352285385131836, "D": -13.366048812866211, "E": -12.250596046447754 } } }, { "ex_id": "aqua-test-31", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -0.5094156265258789, "scores": { "A": -10.960805892944336, "B": -9.841753005981445, "C": -10.351168632507324, "D": -10.83080768585205, "E": -11.36546802520752 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -5.294549942016602, "scores": { "A": -6.641495704650879, "B": -10.112987518310547, "C": -11.93604564666748, "D": -10.474628448486328, "E": -9.680524826049805 } } }, { "ex_id": "aqua-test-32", "gold": "B", "baseline": { "pred_label": "C", "correct": false, "margin": -0.4654970169067383, "scores": { "A": -12.72982406616211, "B": -10.999740600585938, "C": -10.5342435836792, "D": -12.491869926452637, "E": -11.609811782836914 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.625300884246826, "scores": { "A": -6.746448993682861, "B": -10.371749877929688, "C": -10.17892074584961, "D": -12.78900146484375, "E": -13.765708923339844 } } }, { "ex_id": "aqua-test-33", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 1.2154502868652344, "scores": { "A": -17.279247283935547, "B": -18.187232971191406, "C": -16.063796997070312, "D": -19.143869400024414, "E": -19.470874786376953 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -0.4487724304199219, "scores": { "A": -9.145519256591797, "B": -10.157659530639648, "C": -9.594291687011719, "D": -10.095281600952148, "E": -10.523807525634766 } } }, { "ex_id": "aqua-test-34", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -2.934541702270508, "scores": { "A": -14.833627700805664, "B": -11.353015899658203, "C": -14.715003967285156, "D": -14.287557601928711, "E": -15.349594116210938 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -5.323911666870117, "scores": { "A": -9.540066719055176, "B": -11.480504035949707, "C": -14.825972557067871, "D": -14.863978385925293, "E": -18.370067596435547 } } }, { "ex_id": "aqua-test-35", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -1.0334539413452148, "scores": { "A": -11.940654754638672, "B": -10.587756156921387, "C": -11.621210098266602, "D": -11.252909660339355, "E": -11.599471092224121 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.0078792572021484, "scores": { "A": -8.075170516967773, "B": -10.238532066345215, "C": -10.083049774169922, "D": -9.30896282196045, "E": -9.410977363586426 } } }, { "ex_id": "aqua-test-36", "gold": "E", "baseline": { "pred_label": "A", "correct": false, "margin": -1.7479333877563477, "scores": { "A": -9.8831205368042, "B": -10.474870681762695, "C": -10.734328269958496, "D": -12.681618690490723, "E": -11.631053924560547 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.574769020080566, "scores": { "A": -8.31888198852539, "B": -12.751762390136719, "C": -12.679941177368164, "D": -14.572219848632812, "E": -14.893651008605957 } } }, { "ex_id": "aqua-test-37", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -0.9962596893310547, "scores": { "A": -11.241470336914062, "B": -10.501060485839844, "C": -13.315231323242188, "D": -12.760457992553711, "E": -11.497320175170898 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -0.8308868408203125, "scores": { "A": -9.542606353759766, "B": -10.720484733581543, "C": -12.237710952758789, "D": -10.93893814086914, "E": -10.373493194580078 } } }, { "ex_id": "aqua-test-38", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -1.5876941680908203, "scores": { "A": -13.907208442687988, "B": -12.088470458984375, "C": -17.00652313232422, "D": -16.789785385131836, "E": -13.676164627075195 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.61981201171875, "scores": { "A": -8.97873306274414, "B": -11.468414306640625, "C": -14.408849716186523, "D": -12.497419357299805, "E": -12.59854507446289 } } }, { "ex_id": "aqua-test-39", "gold": "A", "baseline": { "pred_label": "A", "correct": true, "margin": 1.704728126525879, "scores": { "A": -10.207995414733887, "B": -11.912723541259766, "C": -12.109935760498047, "D": -14.276583671569824, "E": -13.992156982421875 } }, "ablated": { "pred_label": "C", "correct": false, "margin": -0.4076976776123047, "scores": { "A": -10.194977760314941, "B": -10.153923988342285, "C": -9.787280082702637, "D": -11.554168701171875, "E": -10.806174278259277 } } }, { "ex_id": "aqua-test-40", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -1.1876583099365234, "scores": { "A": -9.934663772583008, "B": -8.747005462646484, "C": -8.951501846313477, "D": -11.43099308013916, "E": -10.340058326721191 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 2.8720083236694336, "scores": { "A": -6.977086067199707, "B": -10.881498336791992, "C": -10.488456726074219, "D": -9.84909439086914, "E": -9.965597152709961 } } }, { "ex_id": "aqua-test-41", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -3.4858741760253906, "scores": { "A": -13.249530792236328, "B": -12.764188766479492, "C": -14.671175003051758, "D": -16.250062942504883, "E": -16.50116539001465 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.068471908569336, "scores": { "A": -8.645910263061523, "B": -9.370107650756836, "C": -10.64356803894043, "D": -10.71438217163086, "E": -10.385175704956055 } } }, { "ex_id": "aqua-test-42", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -0.9857778549194336, "scores": { "A": -14.824063301086426, "B": -12.47767448425293, "C": -12.883535385131836, "D": -13.463452339172363, "E": -14.847708702087402 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.4033865928649902, "scores": { "A": -6.698444843292236, "B": -11.148558616638184, "C": -12.872434616088867, "D": -10.101831436157227, "E": -12.350337982177734 } } }, { "ex_id": "aqua-test-43", "gold": "D", "baseline": { "pred_label": "C", "correct": false, "margin": -0.739567756652832, "scores": { "A": -12.141595840454102, "B": -11.716957092285156, "C": -11.391490936279297, "D": -12.131058692932129, "E": -13.988408088684082 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.8107433319091797, "scores": { "A": -5.002326965332031, "B": -7.882379531860352, "C": -9.065218925476074, "D": -6.813070297241211, "E": -9.598858833312988 } } }, { "ex_id": "aqua-test-44", "gold": "A", "baseline": { "pred_label": "A", "correct": true, "margin": 0.3987007141113281, "scores": { "A": -10.112314224243164, "B": -10.511014938354492, "C": -10.519290924072266, "D": -12.189737319946289, "E": -15.004023551940918 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 3.4644508361816406, "scores": { "A": -7.429119110107422, "B": -11.428091049194336, "C": -14.222383499145508, "D": -10.893569946289062, "E": -14.403785705566406 } } }, { "ex_id": "aqua-test-45", "gold": "A", "baseline": { "pred_label": "C", "correct": false, "margin": -1.0466318130493164, "scores": { "A": -12.969017028808594, "B": -12.31945514678955, "C": -11.922385215759277, "D": -12.34321403503418, "E": -12.47985553741455 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 2.317699909210205, "scores": { "A": -7.05304479598999, "B": -9.370744705200195, "C": -10.5771484375, "D": -9.985260009765625, "E": -12.94332504272461 } } }, { "ex_id": "aqua-test-46", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -0.7821559906005859, "scores": { "A": -12.967859268188477, "B": -11.404945373535156, "C": -11.63718032836914, "D": -11.908271789550781, "E": -12.187101364135742 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.7385272979736328, "scores": { "A": -7.029201507568359, "B": -8.36732292175293, "C": -10.496192932128906, "D": -8.434120178222656, "E": -8.767728805541992 } } }, { "ex_id": "aqua-test-47", "gold": "E", "baseline": { "pred_label": "E", "correct": true, "margin": 0.20550537109375, "scores": { "A": -11.954267501831055, "B": -12.503751754760742, "C": -12.114371299743652, "D": -13.045472145080566, "E": -11.748762130737305 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.0157623291015625, "scores": { "A": -9.386420249938965, "B": -11.835212707519531, "C": -13.338075637817383, "D": -12.148918151855469, "E": -13.402182579040527 } } }, { "ex_id": "aqua-test-48", "gold": "E", "baseline": { "pred_label": "C", "correct": false, "margin": -2.2838096618652344, "scores": { "A": -14.12697982788086, "B": -14.659561157226562, "C": -9.08004379272461, "D": -10.326372146606445, "E": -11.363853454589844 } }, "ablated": { "pred_label": "C", "correct": false, "margin": -1.4984474182128906, "scores": { "A": -7.6805315017700195, "B": -8.929984092712402, "C": -6.419025421142578, "D": -7.822979927062988, "E": -7.917472839355469 } } }, { "ex_id": "aqua-test-49", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -4.129861831665039, "scores": { "A": -15.956474304199219, "B": -11.82661247253418, "C": -14.917438507080078, "D": -13.226446151733398, "E": -13.977205276489258 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 1.0888185501098633, "scores": { "A": -8.945561408996582, "B": -10.034379959106445, "C": -13.530162811279297, "D": -10.250844955444336, "E": -12.504024505615234 } } }, { "ex_id": "aqua-test-50", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -2.279160499572754, "scores": { "A": -9.496601104736328, "B": -8.565024375915527, "C": -9.976577758789062, "D": -10.844184875488281, "E": -10.707534790039062 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.749849319458008, "scores": { "A": -7.051667213439941, "B": -10.064657211303711, "C": -12.560236930847168, "D": -10.80151653289795, "E": -13.33292007446289 } } }, { "ex_id": "aqua-test-51", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -3.252643585205078, "scores": { "A": -11.461423873901367, "B": -9.536659240722656, "C": -9.648260116577148, "D": -12.789302825927734, "E": -11.993169784545898 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -5.026073455810547, "scores": { "A": -5.237083435058594, "B": -8.581623077392578, "C": -12.21021842956543, "D": -10.26315689086914, "E": -11.514408111572266 } } }, { "ex_id": "aqua-test-52", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.04578971862792969, "scores": { "A": -12.882274627685547, "B": -9.855215072631836, "C": -9.901004791259766, "D": -11.499755859375, "E": -10.678110122680664 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -0.4922494888305664, "scores": { "A": -5.136632442474365, "B": -5.628881931304932, "C": -6.605200290679932, "D": -6.88695764541626, "E": -6.429419994354248 } } }, { "ex_id": "aqua-test-53", "gold": "D", "baseline": { "pred_label": "C", "correct": false, "margin": -4.019079208374023, "scores": { "A": -12.729389190673828, "B": -12.427694320678711, "C": -8.400447845458984, "D": -12.419527053833008, "E": -13.457754135131836 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.768967628479004, "scores": { "A": -4.923480033874512, "B": -8.778578758239746, "C": -8.191584587097168, "D": -9.692447662353516, "E": -10.092605590820312 } } }, { "ex_id": "aqua-test-54", "gold": "D", "baseline": { "pred_label": "C", "correct": false, "margin": -2.2229089736938477, "scores": { "A": -12.411125183105469, "B": -12.760860443115234, "C": -11.302736282348633, "D": -13.52564525604248, "E": -11.65049934387207 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -5.5538482666015625, "scores": { "A": -7.524580955505371, "B": -12.95750904083252, "C": -11.718106269836426, "D": -13.078429222106934, "E": -12.335714340209961 } } }, { "ex_id": "aqua-test-55", "gold": "B", "baseline": { "pred_label": "C", "correct": false, "margin": -0.8632240295410156, "scores": { "A": -10.496152877807617, "B": -10.564685821533203, "C": -9.701461791992188, "D": -13.170589447021484, "E": -11.492547988891602 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.287240982055664, "scores": { "A": -8.019121170043945, "B": -10.30636215209961, "C": -11.232714653015137, "D": -11.299230575561523, "E": -13.430822372436523 } } }, { "ex_id": "aqua-test-56", "gold": "D", "baseline": { "pred_label": "E", "correct": false, "margin": -1.5651264190673828, "scores": { "A": -10.601175308227539, "B": -11.313573837280273, "C": -12.363874435424805, "D": -12.14034652709961, "E": -10.575220108032227 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.2690534591674805, "scores": { "A": -7.066210746765137, "B": -8.967557907104492, "C": -10.526098251342773, "D": -8.335264205932617, "E": -9.71631145477295 } } }, { "ex_id": "aqua-test-57", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.14557647705078125, "scores": { "A": -13.898555755615234, "B": -12.992910385131836, "C": -14.371723175048828, "D": -14.158893585205078, "E": -13.138486862182617 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.6548147201538086, "scores": { "A": -10.324930191040039, "B": -12.979744911193848, "C": -12.848653793334961, "D": -12.86312484741211, "E": -12.547582626342773 } } }, { "ex_id": "aqua-test-58", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -3.41909122467041, "scores": { "A": -15.519378662109375, "B": -10.206266403198242, "C": -13.625357627868652, "D": -15.576879501342773, "E": -14.738330841064453 } }, "ablated": { "pred_label": "B", "correct": false, "margin": -6.554704189300537, "scores": { "A": -10.922317504882812, "B": -7.338093280792236, "C": -13.892797470092773, "D": -11.01749038696289, "E": -13.093095779418945 } } }, { "ex_id": "aqua-test-59", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -1.0286178588867188, "scores": { "A": -9.556885719299316, "B": -8.528267860412598, "C": -9.387777328491211, "D": -11.924543380737305, "E": -12.156147003173828 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 2.6055479049682617, "scores": { "A": -7.506214141845703, "B": -10.111762046813965, "C": -10.535852432250977, "D": -12.113842010498047, "E": -13.108339309692383 } } }, { "ex_id": "aqua-test-60", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -4.278145790100098, "scores": { "A": -9.984968185424805, "B": -9.964075088500977, "C": -10.470348358154297, "D": -14.242220878601074, "E": -14.801360130310059 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.694246292114258, "scores": { "A": -5.662052154541016, "B": -9.853955268859863, "C": -11.739667892456055, "D": -12.356298446655273, "E": -13.508790016174316 } } }, { "ex_id": "aqua-test-61", "gold": "E", "baseline": { "pred_label": "A", "correct": false, "margin": -1.4795856475830078, "scores": { "A": -11.702659606933594, "B": -13.152563095092773, "C": -15.516944885253906, "D": -12.736021041870117, "E": -13.182245254516602 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.2732534408569336, "scores": { "A": -7.3774213790893555, "B": -8.904997825622559, "C": -12.49362564086914, "D": -8.682700157165527, "E": -9.650674819946289 } } }, { "ex_id": "aqua-test-62", "gold": "A", "baseline": { "pred_label": "C", "correct": false, "margin": -4.083756446838379, "scores": { "A": -12.819320678710938, "B": -9.86478042602539, "C": -8.735564231872559, "D": -13.259029388427734, "E": -14.102011680603027 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 2.9089107513427734, "scores": { "A": -5.8731231689453125, "B": -8.782033920288086, "C": -8.930816650390625, "D": -11.697149276733398, "E": -13.882165908813477 } } }, { "ex_id": "aqua-test-63", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -1.828573226928711, "scores": { "A": -12.999687194824219, "B": -11.171113967895508, "C": -12.38466739654541, "D": -15.068181991577148, "E": -14.821438789367676 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 6.530241966247559, "scores": { "A": -9.71985149383545, "B": -16.808002471923828, "C": -17.539220809936523, "D": -16.250093460083008, "E": -17.91951560974121 } } }, { "ex_id": "aqua-test-64", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -0.11572933197021484, "scores": { "A": -11.253127098083496, "B": -9.855234146118164, "C": -9.970963478088379, "D": -11.471985816955566, "E": -13.291877746582031 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.368386745452881, "scores": { "A": -7.864134311676025, "B": -8.514406204223633, "C": -10.232521057128906, "D": -9.923612594604492, "E": -10.108715057373047 } } }, { "ex_id": "aqua-test-65", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -0.8041362762451172, "scores": { "A": -11.474063873291016, "B": -10.669927597045898, "C": -11.573220252990723, "D": -11.306943893432617, "E": -12.886905670166016 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 2.3487911224365234, "scores": { "A": -8.487371444702148, "B": -10.836162567138672, "C": -13.451092720031738, "D": -12.694389343261719, "E": -12.682896614074707 } } }, { "ex_id": "aqua-test-66", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -2.141798973083496, "scores": { "A": -12.508940696716309, "B": -10.861954689025879, "C": -12.234925270080566, "D": -11.884855270385742, "E": -13.003753662109375 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.2710695266723633, "scores": { "A": -8.62102222442627, "B": -10.894733428955078, "C": -14.39864730834961, "D": -10.672046661376953, "E": -11.892091751098633 } } }, { "ex_id": "aqua-test-67", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -1.341836929321289, "scores": { "A": -8.820962905883789, "B": -8.544965744018555, "C": -9.699121475219727, "D": -9.886802673339844, "E": -10.276521682739258 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.691999435424805, "scores": { "A": -6.632804870605469, "B": -11.282808303833008, "C": -13.191905975341797, "D": -11.324804306030273, "E": -13.604455947875977 } } }, { "ex_id": "aqua-test-68", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.6945219039916992, "scores": { "A": -11.65401840209961, "B": -10.95949649810791, "C": -11.869510650634766, "D": -12.070514678955078, "E": -12.618841171264648 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.171473503112793, "scores": { "A": -9.029966354370117, "B": -11.20143985748291, "C": -11.244144439697266, "D": -11.500038146972656, "E": -10.598958015441895 } } }, { "ex_id": "aqua-test-69", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -0.48809242248535156, "scores": { "A": -12.742959976196289, "B": -10.583757400512695, "C": -11.071849822998047, "D": -14.073648452758789, "E": -13.576339721679688 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.911890983581543, "scores": { "A": -6.22026252746582, "B": -9.122340202331543, "C": -13.132153511047363, "D": -11.907660484313965, "E": -11.635204315185547 } } }, { "ex_id": "aqua-test-70", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -3.720950126647949, "scores": { "A": -12.35704231262207, "B": -8.636092185974121, "C": -13.832864761352539, "D": -10.753250122070312, "E": -13.392253875732422 } }, "ablated": { "pred_label": "B", "correct": false, "margin": -0.03596019744873047, "scores": { "A": -7.090466499328613, "B": -7.054506301879883, "C": -11.16434097290039, "D": -7.422432899475098, "E": -10.723372459411621 } } }, { "ex_id": "aqua-test-71", "gold": "A", "baseline": { "pred_label": "C", "correct": false, "margin": -3.9103527069091797, "scores": { "A": -14.14399528503418, "B": -15.403404235839844, "C": -10.233642578125, "D": -13.802553176879883, "E": -11.533794403076172 } }, "ablated": { "pred_label": "E", "correct": false, "margin": -0.41689586639404297, "scores": { "A": -8.586220741271973, "B": -12.792287826538086, "C": -9.270968437194824, "D": -9.66348648071289, "E": -8.16932487487793 } } }, { "ex_id": "aqua-test-72", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -1.3486766815185547, "scores": { "A": -14.158398628234863, "B": -12.339851379394531, "C": -13.015729904174805, "D": -13.451315879821777, "E": -13.688528060913086 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.4561805725097656, "scores": { "A": -8.697513580322266, "B": -9.179859161376953, "C": -10.927139282226562, "D": -9.569753646850586, "E": -10.153694152832031 } } }, { "ex_id": "aqua-test-73", "gold": "E", "baseline": { "pred_label": "C", "correct": false, "margin": -6.728538513183594, "scores": { "A": -15.75730037689209, "B": -13.720550537109375, "C": -10.241283416748047, "D": -14.861220359802246, "E": -16.96982192993164 } }, "ablated": { "pred_label": "C", "correct": false, "margin": -4.603492736816406, "scores": { "A": -7.970151901245117, "B": -8.63032341003418, "C": -5.910724639892578, "D": -10.074857711791992, "E": -10.514217376708984 } } }, { "ex_id": "aqua-test-74", "gold": "A", "baseline": { "pred_label": "C", "correct": false, "margin": -1.4039888381958008, "scores": { "A": -12.737687110900879, "B": -11.68657112121582, "C": -11.333698272705078, "D": -13.284832954406738, "E": -14.690400123596191 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 0.2767963409423828, "scores": { "A": -7.744673728942871, "B": -9.163191795349121, "C": -8.021470069885254, "D": -8.26796817779541, "E": -8.809639930725098 } } }, { "ex_id": "aqua-test-75", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -3.457437515258789, "scores": { "A": -12.093403816223145, "B": -8.635966300964355, "C": -10.58320140838623, "D": -12.374037742614746, "E": -13.680496215820312 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 4.63665771484375, "scores": { "A": -6.7574872970581055, "B": -11.394145011901855, "C": -12.999401092529297, "D": -11.796443939208984, "E": -13.318641662597656 } } }, { "ex_id": "aqua-test-76", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -0.5669078826904297, "scores": { "A": -13.41685676574707, "B": -11.04054069519043, "C": -11.60744857788086, "D": -15.16108512878418, "E": -14.487443923950195 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.9996509552001953, "scores": { "A": -10.679261207580566, "B": -12.45645523071289, "C": -12.678912162780762, "D": -12.86469554901123, "E": -14.037067413330078 } } }, { "ex_id": "aqua-test-77", "gold": "D", "baseline": { "pred_label": "C", "correct": false, "margin": -2.8945484161376953, "scores": { "A": -12.302556991577148, "B": -10.522138595581055, "C": -9.23642349243164, "D": -12.130971908569336, "E": -14.117457389831543 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -5.082538604736328, "scores": { "A": -6.24571418762207, "B": -8.398621559143066, "C": -11.599692344665527, "D": -11.328252792358398, "E": -12.078139305114746 } } }, { "ex_id": "aqua-test-78", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 3.0808143615722656, "scores": { "A": -12.794174194335938, "B": -8.323003768920898, "C": -11.403818130493164, "D": -13.768218994140625, "E": -13.847496032714844 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.647706031799316, "scores": { "A": -5.634004592895508, "B": -10.281710624694824, "C": -11.297346115112305, "D": -12.075166702270508, "E": -12.413890838623047 } } }, { "ex_id": "aqua-test-79", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -0.665989875793457, "scores": { "A": -11.933293342590332, "B": -11.267303466796875, "C": -12.420202255249023, "D": -11.657835006713867, "E": -12.877152442932129 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 1.082460880279541, "scores": { "A": -7.030532360076904, "B": -8.325664520263672, "C": -10.181509017944336, "D": -8.112993240356445, "E": -9.957942962646484 } } }, { "ex_id": "aqua-test-80", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -1.5075702667236328, "scores": { "A": -10.707103729248047, "B": -9.199533462524414, "C": -10.327856063842773, "D": -10.43326187133789, "E": -11.281829833984375 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 1.147303581237793, "scores": { "A": -5.987746238708496, "B": -7.135049819946289, "C": -8.613941192626953, "D": -7.509088516235352, "E": -7.937631607055664 } } }, { "ex_id": "aqua-test-81", "gold": "E", "baseline": { "pred_label": "C", "correct": false, "margin": -2.7157487869262695, "scores": { "A": -12.188438415527344, "B": -11.99388599395752, "C": -10.619071006774902, "D": -14.215484619140625, "E": -13.334819793701172 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.729328155517578, "scores": { "A": -10.309041976928711, "B": -13.062793731689453, "C": -12.420219421386719, "D": -12.16856575012207, "E": -13.038370132446289 } } }, { "ex_id": "aqua-test-82", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -2.778050422668457, "scores": { "A": -12.289058685302734, "B": -9.830384254455566, "C": -12.608434677124023, "D": -13.991266250610352, "E": -11.78373908996582 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.322303771972656, "scores": { "A": -6.604994773864746, "B": -9.52199649810791, "C": -10.927298545837402, "D": -10.680765151977539, "E": -9.880135536193848 } } }, { "ex_id": "aqua-test-83", "gold": "D", "baseline": { "pred_label": "C", "correct": false, "margin": -1.9063081741333008, "scores": { "A": -10.316740036010742, "B": -9.180550575256348, "C": -8.257037162780762, "D": -10.163345336914062, "E": -9.424388885498047 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.0506300926208496, "scores": { "A": -5.58945894241333, "B": -7.652187824249268, "C": -9.419204711914062, "D": -8.64008903503418, "E": -10.067176818847656 } } }, { "ex_id": "aqua-test-84", "gold": "A", "baseline": { "pred_label": "C", "correct": false, "margin": -2.371249198913574, "scores": { "A": -11.472586631774902, "B": -10.989583015441895, "C": -9.101337432861328, "D": -9.797515869140625, "E": -10.398811340332031 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 1.0994462966918945, "scores": { "A": -9.702713012695312, "B": -11.570058822631836, "C": -11.636595726013184, "D": -10.802159309387207, "E": -11.598857879638672 } } }, { "ex_id": "aqua-test-85", "gold": "A", "baseline": { "pred_label": "C", "correct": false, "margin": -2.0057201385498047, "scores": { "A": -11.025768280029297, "B": -9.936981201171875, "C": -9.020048141479492, "D": -13.46237564086914, "E": -13.570629119873047 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 3.6366686820983887, "scores": { "A": -6.244076251983643, "B": -10.921756744384766, "C": -9.880744934082031, "D": -11.773923873901367, "E": -14.056009292602539 } } }, { "ex_id": "aqua-test-86", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -1.7587852478027344, "scores": { "A": -10.227313995361328, "B": -8.468528747558594, "C": -10.537178039550781, "D": -12.026582717895508, "E": -11.197158813476562 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 1.7784309387207031, "scores": { "A": -9.244726181030273, "B": -11.023157119750977, "C": -12.277462005615234, "D": -12.193278312683105, "E": -11.235006332397461 } } }, { "ex_id": "aqua-test-87", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.6314544677734375, "scores": { "A": -9.793952941894531, "B": -9.162498474121094, "C": -11.231021881103516, "D": -12.002910614013672, "E": -11.467964172363281 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.9217519760131836, "scores": { "A": -6.585877418518066, "B": -9.50762939453125, "C": -9.712257385253906, "D": -9.212251663208008, "E": -11.261186599731445 } } }, { "ex_id": "aqua-test-88", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -1.4125795364379883, "scores": { "A": -14.69003963470459, "B": -12.324016571044922, "C": -13.061227798461914, "D": -13.971894264221191, "E": -13.73659610748291 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.924210071563721, "scores": { "A": -5.407630443572998, "B": -6.251180171966553, "C": -7.470930576324463, "D": -9.033124923706055, "E": -10.331840515136719 } } }, { "ex_id": "aqua-test-89", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -3.028618812561035, "scores": { "A": -10.8992919921875, "B": -7.870673179626465, "C": -10.651062965393066, "D": -12.425169944763184, "E": -11.295161247253418 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 1.749924659729004, "scores": { "A": -6.359846115112305, "B": -8.109770774841309, "C": -11.431536674499512, "D": -9.726787567138672, "E": -9.291827201843262 } } }, { "ex_id": "aqua-test-90", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -1.4867076873779297, "scores": { "A": -11.009735107421875, "B": -10.518085479736328, "C": -12.004793167114258, "D": -12.105035781860352, "E": -13.80916976928711 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.5990352630615234, "scores": { "A": -8.198083877563477, "B": -9.129544258117676, "C": -10.797119140625, "D": -9.568111419677734, "E": -10.094569206237793 } } }, { "ex_id": "aqua-test-91", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -1.3843660354614258, "scores": { "A": -12.006914138793945, "B": -10.0424222946167, "C": -11.426788330078125, "D": -12.127811431884766, "E": -10.665849685668945 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.6309232711791992, "scores": { "A": -8.031240463256836, "B": -8.961100578308105, "C": -9.662163734436035, "D": -9.757013320922852, "E": -9.014056205749512 } } }, { "ex_id": "aqua-test-92", "gold": "B", "baseline": { "pred_label": "A", "correct": false, "margin": -0.3693962097167969, "scores": { "A": -12.054543495178223, "B": -12.42393970489502, "C": -12.770564079284668, "D": -14.098543167114258, "E": -14.959080696105957 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.9901371002197266, "scores": { "A": -11.04039192199707, "B": -15.030529022216797, "C": -15.173776626586914, "D": -13.621156692504883, "E": -17.786663055419922 } } }, { "ex_id": "aqua-test-93", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -1.8483829498291016, "scores": { "A": -13.06054401397705, "B": -9.098247528076172, "C": -9.98631477355957, "D": -10.790071487426758, "E": -10.946630477905273 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.748597145080566, "scores": { "A": -8.24567699432373, "B": -12.128637313842773, "C": -15.724483489990234, "D": -13.707090377807617, "E": -14.994274139404297 } } }, { "ex_id": "aqua-test-94", "gold": "E", "baseline": { "pred_label": "C", "correct": false, "margin": -1.9227781295776367, "scores": { "A": -12.715982437133789, "B": -10.660414695739746, "C": -10.040125846862793, "D": -13.691793441772461, "E": -11.96290397644043 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -5.577579975128174, "scores": { "A": -5.7996954917907715, "B": -6.2772345542907715, "C": -9.271978378295898, "D": -10.277997970581055, "E": -11.377275466918945 } } }, { "ex_id": "aqua-test-95", "gold": "E", "baseline": { "pred_label": "C", "correct": false, "margin": -3.9412012100219727, "scores": { "A": -11.20505428314209, "B": -9.095925331115723, "C": -8.017539978027344, "D": -12.286678314208984, "E": -11.958741188049316 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.4480342864990234, "scores": { "A": -8.222542762756348, "B": -9.621024131774902, "C": -8.630133628845215, "D": -9.554940223693848, "E": -9.670577049255371 } } }, { "ex_id": "aqua-test-96", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -4.339990615844727, "scores": { "A": -9.821067810058594, "B": -9.126599311828613, "C": -13.028761863708496, "D": -11.429372787475586, "E": -13.46658992767334 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.383525371551514, "scores": { "A": -7.286087512969971, "B": -8.687753677368164, "C": -13.005938529968262, "D": -10.64708423614502, "E": -11.669612884521484 } } }, { "ex_id": "aqua-test-97", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -2.8383378982543945, "scores": { "A": -14.469042778015137, "B": -12.092732429504395, "C": -13.835532188415527, "D": -14.931070327758789, "E": -13.803962707519531 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.630073547363281, "scores": { "A": -10.096877098083496, "B": -10.369461059570312, "C": -13.923606872558594, "D": -14.726950645446777, "E": -14.135393142700195 } } }, { "ex_id": "aqua-test-98", "gold": "D", "baseline": { "pred_label": "C", "correct": false, "margin": -2.004549980163574, "scores": { "A": -13.00085735321045, "B": -10.797918319702148, "C": -10.5806884765625, "D": -12.585238456726074, "E": -11.885275840759277 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.416612148284912, "scores": { "A": -4.055731296539307, "B": -8.211648941040039, "C": -10.580713272094727, "D": -10.472343444824219, "E": -12.015127182006836 } } }, { "ex_id": "aqua-test-99", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -1.7006492614746094, "scores": { "A": -10.885625839233398, "B": -9.20606803894043, "C": -9.620462417602539, "D": -11.3240966796875, "E": -10.906717300415039 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -7.5970916748046875, "scores": { "A": -7.159132957458496, "B": -8.3507661819458, "C": -12.776918411254883, "D": -12.628029823303223, "E": -14.756224632263184 } } }, { "ex_id": "aqua-test-100", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 0.1513805389404297, "scores": { "A": -9.272323608398438, "B": -9.739631652832031, "C": -9.120943069458008, "D": -10.063505172729492, "E": -10.608749389648438 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.166820049285889, "scores": { "A": -4.778280735015869, "B": -9.417329788208008, "C": -10.945100784301758, "D": -11.501747131347656, "E": -13.226821899414062 } } }, { "ex_id": "aqua-test-101", "gold": "A", "baseline": { "pred_label": "A", "correct": true, "margin": 0.3324604034423828, "scores": { "A": -10.100502014160156, "B": -10.432962417602539, "C": -11.973075866699219, "D": -10.604475021362305, "E": -12.458782196044922 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 4.506379127502441, "scores": { "A": -7.595697402954102, "B": -12.102076530456543, "C": -13.821539878845215, "D": -12.81662654876709, "E": -14.543049812316895 } } }, { "ex_id": "aqua-test-102", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -4.450368881225586, "scores": { "A": -11.315129280090332, "B": -10.445816993713379, "C": -10.68630599975586, "D": -14.12060832977295, "E": -14.896185874938965 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -7.9963579177856445, "scores": { "A": -6.833308219909668, "B": -10.518851280212402, "C": -13.344768524169922, "D": -13.985496520996094, "E": -14.829666137695312 } } }, { "ex_id": "aqua-test-103", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 1.1493282318115234, "scores": { "A": -9.748441696166992, "B": -8.529296875, "C": -9.693557739257812, "D": -11.449222564697266, "E": -9.678625106811523 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -0.7451763153076172, "scores": { "A": -7.868520736694336, "B": -8.613697052001953, "C": -10.544960975646973, "D": -9.806873321533203, "E": -8.439764022827148 } } }, { "ex_id": "aqua-test-104", "gold": "D", "baseline": { "pred_label": "C", "correct": false, "margin": -3.3600540161132812, "scores": { "A": -14.189347267150879, "B": -11.361625671386719, "C": -11.152084350585938, "D": -14.512138366699219, "E": -15.981123924255371 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.8950366973876953, "scores": { "A": -8.590035438537598, "B": -10.608055114746094, "C": -11.210797309875488, "D": -11.485072135925293, "E": -10.96902847290039 } } }, { "ex_id": "aqua-test-105", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 3.0392799377441406, "scores": { "A": -11.515534400939941, "B": -12.032148361206055, "C": -8.4762544631958, "D": -13.967401504516602, "E": -13.267354011535645 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.133573055267334, "scores": { "A": -7.463276386260986, "B": -9.80911636352539, "C": -8.59684944152832, "D": -13.382390975952148, "E": -13.58960247039795 } } }, { "ex_id": "aqua-test-106", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -0.9239311218261719, "scores": { "A": -10.124608039855957, "B": -9.200676918029785, "C": -9.467672348022461, "D": -13.042096138000488, "E": -13.135705947875977 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 3.8698911666870117, "scores": { "A": -7.598464012145996, "B": -11.948047637939453, "C": -13.422207832336426, "D": -11.468355178833008, "E": -14.042037010192871 } } }, { "ex_id": "aqua-test-107", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -1.0206260681152344, "scores": { "A": -9.933467864990234, "B": -8.912841796875, "C": -11.042007446289062, "D": -12.203380584716797, "E": -10.170745849609375 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 3.180150032043457, "scores": { "A": -7.0703229904174805, "B": -10.250473022460938, "C": -11.684900283813477, "D": -10.878337860107422, "E": -11.952753067016602 } } }, { "ex_id": "aqua-test-108", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -2.651386260986328, "scores": { "A": -12.283191680908203, "B": -9.631805419921875, "C": -12.84640121459961, "D": -13.274940490722656, "E": -14.580394744873047 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 1.882678508758545, "scores": { "A": -6.49999475479126, "B": -8.382673263549805, "C": -11.06556510925293, "D": -10.006368637084961, "E": -11.96578598022461 } } }, { "ex_id": "aqua-test-109", "gold": "C", "baseline": { "pred_label": "E", "correct": false, "margin": -0.8047208786010742, "scores": { "A": -10.049991607666016, "B": -11.320069313049316, "C": -10.77479362487793, "D": -12.454825401306152, "E": -9.970072746276855 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.179561614990234, "scores": { "A": -8.94586181640625, "B": -13.091926574707031, "C": -15.125423431396484, "D": -13.694250106811523, "E": -14.551794052124023 } } }, { "ex_id": "aqua-test-110", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -3.271200180053711, "scores": { "A": -12.053251266479492, "B": -8.445196151733398, "C": -11.71639633178711, "D": -12.579341888427734, "E": -15.129302024841309 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -5.586641788482666, "scores": { "A": -5.685309886932373, "B": -9.068038940429688, "C": -11.271951675415039, "D": -11.380401611328125, "E": -13.35078239440918 } } }, { "ex_id": "aqua-test-111", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.18841552734375, "scores": { "A": -9.808207511901855, "B": -9.283623695373535, "C": -9.472039222717285, "D": -10.7572660446167, "E": -11.43770980834961 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.6287879943847656, "scores": { "A": -8.439443588256836, "B": -12.068231582641602, "C": -12.49129867553711, "D": -13.331933975219727, "E": -14.553701400756836 } } }, { "ex_id": "aqua-test-112", "gold": "A", "baseline": { "pred_label": "C", "correct": false, "margin": -5.095149040222168, "scores": { "A": -16.67582893371582, "B": -16.2126522064209, "C": -11.580679893493652, "D": -13.807619094848633, "E": -15.536310195922852 } }, "ablated": { "pred_label": "D", "correct": false, "margin": -0.09568214416503906, "scores": { "A": -10.995382308959961, "B": -13.306709289550781, "C": -11.021820068359375, "D": -10.899700164794922, "E": -15.755931854248047 } } }, { "ex_id": "aqua-test-113", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -0.3748960494995117, "scores": { "A": -10.411832809448242, "B": -9.715240478515625, "C": -10.090136528015137, "D": -12.844676971435547, "E": -11.264602661132812 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -7.242756366729736, "scores": { "A": -5.61425256729126, "B": -10.998117446899414, "C": -12.857008934020996, "D": -11.544221878051758, "E": -13.189793586730957 } } }, { "ex_id": "aqua-test-114", "gold": "B", "baseline": { "pred_label": "E", "correct": false, "margin": -0.20656394958496094, "scores": { "A": -12.522754669189453, "B": -11.953495025634766, "C": -12.375024795532227, "D": -12.647726058959961, "E": -11.746931076049805 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.894749641418457, "scores": { "A": -5.269627571105957, "B": -10.164377212524414, "C": -12.320079803466797, "D": -10.714139938354492, "E": -12.523801803588867 } } }, { "ex_id": "aqua-test-115", "gold": "E", "baseline": { "pred_label": "A", "correct": false, "margin": -1.2912511825561523, "scores": { "A": -10.948766708374023, "B": -11.498233795166016, "C": -11.3041410446167, "D": -12.34277629852295, "E": -12.240017890930176 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.3497333526611328, "scores": { "A": -9.884903907775879, "B": -12.706127166748047, "C": -13.208802223205566, "D": -11.332338333129883, "E": -11.234637260437012 } } }, { "ex_id": "aqua-test-116", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 1.7076244354248047, "scores": { "A": -12.037410736083984, "B": -9.75387191772461, "C": -11.461496353149414, "D": -11.536352157592773, "E": -11.817276000976562 } }, "ablated": { "pred_label": "D", "correct": false, "margin": -0.5289134979248047, "scores": { "A": -7.282122611999512, "B": -7.493680000305176, "C": -8.805983543395996, "D": -6.964766502380371, "E": -7.28157901763916 } } }, { "ex_id": "aqua-test-117", "gold": "E", "baseline": { "pred_label": "C", "correct": false, "margin": -2.203188896179199, "scores": { "A": -11.712003707885742, "B": -9.130005836486816, "C": -9.083451271057129, "D": -11.028267860412598, "E": -11.286640167236328 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.0263042449951172, "scores": { "A": -6.421114444732666, "B": -6.90539026260376, "C": -7.702053546905518, "D": -8.437675476074219, "E": -7.447418689727783 } } }, { "ex_id": "aqua-test-118", "gold": "C", "baseline": { "pred_label": "A", "correct": false, "margin": -0.34549522399902344, "scores": { "A": -12.712060928344727, "B": -14.21017074584961, "C": -13.05755615234375, "D": -14.676868438720703, "E": -13.82982063293457 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.252425193786621, "scores": { "A": -7.9384307861328125, "B": -10.416237831115723, "C": -10.190855979919434, "D": -11.566178321838379, "E": -11.377034187316895 } } }, { "ex_id": "aqua-test-119", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -1.894613265991211, "scores": { "A": -12.752466201782227, "B": -11.295127868652344, "C": -13.406665802001953, "D": -13.189741134643555, "E": -12.74017333984375 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.093064308166504, "scores": { "A": -6.806607246398926, "B": -7.607048988342285, "C": -10.043014526367188, "D": -10.89967155456543, "E": -11.805773735046387 } } }, { "ex_id": "aqua-test-120", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.3595123291015625, "scores": { "A": -12.646347045898438, "B": -10.183612823486328, "C": -10.54312515258789, "D": -11.979488372802734, "E": -12.640970230102539 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.8485918045043945, "scores": { "A": -7.463525772094727, "B": -12.312117576599121, "C": -11.753535270690918, "D": -12.008286476135254, "E": -13.767097473144531 } } }, { "ex_id": "aqua-test-121", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -2.6828231811523438, "scores": { "A": -12.19267463684082, "B": -10.822580337524414, "C": -11.00235366821289, "D": -13.505403518676758, "E": -13.309852600097656 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.824639797210693, "scores": { "A": -7.172897815704346, "B": -9.329679489135742, "C": -10.56558609008789, "D": -11.997537612915039, "E": -11.65449333190918 } } }, { "ex_id": "aqua-test-122", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.6598358154296875, "scores": { "A": -11.076019287109375, "B": -10.416183471679688, "C": -13.238750457763672, "D": -13.289159774780273, "E": -13.489381790161133 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.3248538970947266, "scores": { "A": -6.911991119384766, "B": -9.236845016479492, "C": -12.405698776245117, "D": -10.99496078491211, "E": -12.164006233215332 } } }, { "ex_id": "aqua-test-123", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 1.928288459777832, "scores": { "A": -12.817946434020996, "B": -13.251622200012207, "C": -10.08199405670166, "D": -12.010282516479492, "E": -12.828923225402832 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.036839485168457, "scores": { "A": -7.727773666381836, "B": -10.925074577331543, "C": -11.764613151550293, "D": -11.528144836425781, "E": -13.928091049194336 } } }, { "ex_id": "aqua-test-124", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -0.9609298706054688, "scores": { "A": -10.947786331176758, "B": -9.929666519165039, "C": -10.890596389770508, "D": -11.183786392211914, "E": -11.429544448852539 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.408071517944336, "scores": { "A": -9.135915756225586, "B": -11.747968673706055, "C": -11.543987274169922, "D": -11.629928588867188, "E": -11.322809219360352 } } }, { "ex_id": "aqua-test-125", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 0.26287078857421875, "scores": { "A": -12.876455307006836, "B": -12.006429672241211, "C": -10.34354305267334, "D": -10.606413841247559, "E": -11.505398750305176 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.1885986328125, "scores": { "A": -6.909121513366699, "B": -8.705928802490234, "C": -10.0977201461792, "D": -9.862305641174316, "E": -10.177146911621094 } } }, { "ex_id": "aqua-test-126", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -0.5388050079345703, "scores": { "A": -11.017732620239258, "B": -8.353882789611816, "C": -8.892687797546387, "D": -9.955700874328613, "E": -10.49584674835205 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -5.850264549255371, "scores": { "A": -5.791948318481445, "B": -10.630058288574219, "C": -11.642212867736816, "D": -11.66257381439209, "E": -12.981222152709961 } } }, { "ex_id": "aqua-test-127", "gold": "A", "baseline": { "pred_label": "D", "correct": false, "margin": -0.07229804992675781, "scores": { "A": -10.561637878417969, "B": -10.769161224365234, "C": -10.85409164428711, "D": -10.489339828491211, "E": -11.068832397460938 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 1.2087717056274414, "scores": { "A": -6.518125534057617, "B": -8.725174903869629, "C": -9.385905265808105, "D": -7.726897239685059, "E": -8.385092735290527 } } }, { "ex_id": "aqua-test-128", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -2.00350284576416, "scores": { "A": -10.787993431091309, "B": -10.3438081741333, "C": -10.841973304748535, "D": -13.455949783325195, "E": -12.347311019897461 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.958767890930176, "scores": { "A": -5.449042320251465, "B": -7.945782661437988, "C": -9.260028839111328, "D": -11.206633567810059, "E": -10.40781021118164 } } }, { "ex_id": "aqua-test-129", "gold": "D", "baseline": { "pred_label": "A", "correct": false, "margin": -2.607754707336426, "scores": { "A": -8.333622932434082, "B": -10.68683910369873, "C": -9.665505409240723, "D": -10.941377639770508, "E": -9.643619537353516 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.0246152877807617, "scores": { "A": -6.0440778732299805, "B": -8.230778694152832, "C": -7.669195175170898, "D": -8.068693161010742, "E": -8.261618614196777 } } }, { "ex_id": "aqua-test-130", "gold": "D", "baseline": { "pred_label": "D", "correct": true, "margin": 0.5736770629882812, "scores": { "A": -12.52768611907959, "B": -11.624752044677734, "C": -14.400633811950684, "D": -11.051074981689453, "E": -12.196588516235352 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -0.6399145126342773, "scores": { "A": -7.714714050292969, "B": -7.738489151000977, "C": -10.441914558410645, "D": -8.354628562927246, "E": -8.231303215026855 } } }, { "ex_id": "aqua-test-131", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 3.308675765991211, "scores": { "A": -14.739927291870117, "B": -9.621098518371582, "C": -12.929774284362793, "D": -13.892219543457031, "E": -14.483654022216797 } }, "ablated": { "pred_label": "B", "correct": true, "margin": 0.7574863433837891, "scores": { "A": -9.305915832519531, "B": -8.548429489135742, "C": -11.97828483581543, "D": -11.67667007446289, "E": -12.261627197265625 } } }, { "ex_id": "aqua-test-132", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -3.159423828125, "scores": { "A": -12.748001098632812, "B": -9.872476577758789, "C": -11.450910568237305, "D": -11.085186004638672, "E": -13.031900405883789 } }, "ablated": { "pred_label": "B", "correct": false, "margin": -1.519545555114746, "scores": { "A": -9.1354398727417, "B": -9.123946189880371, "C": -10.554903030395508, "D": -10.345840454101562, "E": -10.643491744995117 } } }, { "ex_id": "aqua-test-133", "gold": "A", "baseline": { "pred_label": "C", "correct": false, "margin": -4.092678070068359, "scores": { "A": -14.868802070617676, "B": -16.15050506591797, "C": -10.776124000549316, "D": -14.600775718688965, "E": -13.98430061340332 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 3.853121757507324, "scores": { "A": -8.56944465637207, "B": -13.000343322753906, "C": -12.422566413879395, "D": -14.54437255859375, "E": -14.450194358825684 } } }, { "ex_id": "aqua-test-134", "gold": "D", "baseline": { "pred_label": "A", "correct": false, "margin": -2.462491989135742, "scores": { "A": -10.72162914276123, "B": -11.912126541137695, "C": -13.152649879455566, "D": -13.184121131896973, "E": -13.222978591918945 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -5.007552146911621, "scores": { "A": -7.717267990112305, "B": -10.804107666015625, "C": -11.367389678955078, "D": -12.724820137023926, "E": -14.619938850402832 } } }, { "ex_id": "aqua-test-135", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -3.5100650787353516, "scores": { "A": -13.62440299987793, "B": -10.135202407836914, "C": -10.906095504760742, "D": -13.645267486572266, "E": -13.970268249511719 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.0002031326293945, "scores": { "A": -7.204651832580566, "B": -8.548638343811035, "C": -9.101480484008789, "D": -9.204854965209961, "E": -10.522067070007324 } } }, { "ex_id": "aqua-test-136", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -0.6944398880004883, "scores": { "A": -12.556368827819824, "B": -11.861928939819336, "C": -11.924398422241211, "D": -14.289663314819336, "E": -14.393033981323242 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 1.863565444946289, "scores": { "A": -10.153200149536133, "B": -12.455869674682617, "C": -12.468259811401367, "D": -12.016765594482422, "E": -13.061859130859375 } } }, { "ex_id": "aqua-test-137", "gold": "B", "baseline": { "pred_label": "C", "correct": false, "margin": -0.4276561737060547, "scores": { "A": -10.441576957702637, "B": -10.515706062316895, "C": -10.08804988861084, "D": -12.436307907104492, "E": -12.847251892089844 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.2915706634521484, "scores": { "A": -9.814286231994629, "B": -13.105856895446777, "C": -11.835171699523926, "D": -12.84805965423584, "E": -13.396150588989258 } } }, { "ex_id": "aqua-test-138", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -0.1457071304321289, "scores": { "A": -11.518792152404785, "B": -11.373085021972656, "C": -13.849699974060059, "D": -13.589049339294434, "E": -12.325687408447266 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 2.37286376953125, "scores": { "A": -9.340991973876953, "B": -11.942909240722656, "C": -13.375253677368164, "D": -12.332799911499023, "E": -11.713855743408203 } } }, { "ex_id": "aqua-test-139", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -4.983943939208984, "scores": { "A": -11.229677200317383, "B": -9.654775619506836, "C": -11.174234390258789, "D": -12.573564529418945, "E": -14.63871955871582 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.07908821105957, "scores": { "A": -7.586383819580078, "B": -9.389440536499023, "C": -10.293685913085938, "D": -9.784049987792969, "E": -11.665472030639648 } } }, { "ex_id": "aqua-test-140", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 1.090902328491211, "scores": { "A": -12.17054557800293, "B": -10.950679779052734, "C": -12.478940963745117, "D": -12.041582107543945, "E": -12.825494766235352 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.0806522369384766, "scores": { "A": -8.631109237670898, "B": -9.711761474609375, "C": -10.810302734375, "D": -10.214776992797852, "E": -11.603350639343262 } } }, { "ex_id": "aqua-test-141", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 0.3972196578979492, "scores": { "A": -15.668845176696777, "B": -14.022212028503418, "C": -12.345376968383789, "D": -12.742596626281738, "E": -13.434144973754883 } }, "ablated": { "pred_label": "E", "correct": false, "margin": -1.4827747344970703, "scores": { "A": -9.032247543334961, "B": -10.177014350891113, "C": -9.580657005310059, "D": -8.116410255432129, "E": -8.097882270812988 } } }, { "ex_id": "aqua-test-142", "gold": "A", "baseline": { "pred_label": "A", "correct": true, "margin": 0.03481101989746094, "scores": { "A": -12.051063537597656, "B": -12.085874557495117, "C": -12.250658988952637, "D": -13.729873657226562, "E": -13.645383834838867 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 2.3378829956054688, "scores": { "A": -10.318166732788086, "B": -13.508720397949219, "C": -12.656049728393555, "D": -13.118291854858398, "E": -14.187196731567383 } } }, { "ex_id": "aqua-test-143", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -1.2146177291870117, "scores": { "A": -10.845283508300781, "B": -9.34599781036377, "C": -10.440536499023438, "D": -10.560615539550781, "E": -10.313671112060547 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.8671979904174805, "scores": { "A": -8.795687675476074, "B": -11.772283554077148, "C": -12.032180786132812, "D": -10.662885665893555, "E": -11.071569442749023 } } }, { "ex_id": "aqua-test-144", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -0.5195407867431641, "scores": { "A": -14.143760681152344, "B": -13.62421989440918, "C": -16.54352569580078, "D": -16.72017478942871, "E": -16.012075424194336 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 7.688299179077148, "scores": { "A": -6.442632675170898, "B": -14.130931854248047, "C": -17.20372772216797, "D": -14.199527740478516, "E": -15.404621124267578 } } }, { "ex_id": "aqua-test-145", "gold": "B", "baseline": { "pred_label": "E", "correct": false, "margin": -0.12565994262695312, "scores": { "A": -13.606302261352539, "B": -11.843841552734375, "C": -14.27863883972168, "D": -13.242870330810547, "E": -11.718181610107422 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.291046142578125, "scores": { "A": -8.400792121887207, "B": -11.691838264465332, "C": -15.319357872009277, "D": -13.33833122253418, "E": -12.860288619995117 } } }, { "ex_id": "aqua-test-146", "gold": "B", "baseline": { "pred_label": "C", "correct": false, "margin": -1.6924715042114258, "scores": { "A": -9.780816078186035, "B": -11.162944793701172, "C": -9.470473289489746, "D": -10.71984577178955, "E": -9.969797134399414 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.404109001159668, "scores": { "A": -8.36557388305664, "B": -10.769682884216309, "C": -14.039958000183105, "D": -13.555811882019043, "E": -13.034090042114258 } } }, { "ex_id": "aqua-test-147", "gold": "E", "baseline": { "pred_label": "A", "correct": false, "margin": -0.14206600189208984, "scores": { "A": -9.889412879943848, "B": -10.670077323913574, "C": -11.043986320495605, "D": -12.524433135986328, "E": -10.031478881835938 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.204798698425293, "scores": { "A": -6.384355545043945, "B": -8.133864402770996, "C": -9.067092895507812, "D": -10.208111763000488, "E": -8.589154243469238 } } }, { "ex_id": "aqua-test-148", "gold": "D", "baseline": { "pred_label": "D", "correct": true, "margin": 0.09283638000488281, "scores": { "A": -11.842838287353516, "B": -8.686580657958984, "C": -9.391075134277344, "D": -8.593744277954102, "E": -10.327585220336914 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -0.2917442321777344, "scores": { "A": -8.228094100952148, "B": -9.228675842285156, "C": -9.42142105102539, "D": -8.519838333129883, "E": -9.596782684326172 } } }, { "ex_id": "aqua-test-149", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -0.9827327728271484, "scores": { "A": -12.001296997070312, "B": -10.335747718811035, "C": -11.285538673400879, "D": -11.318480491638184, "E": -11.636820793151855 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.491743564605713, "scores": { "A": -7.410029888153076, "B": -10.250740051269531, "C": -13.93216323852539, "D": -13.901773452758789, "E": -15.188919067382812 } } }, { "ex_id": "aqua-test-150", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -2.9258947372436523, "scores": { "A": -12.320900917053223, "B": -9.475645065307617, "C": -10.984822273254395, "D": -12.40153980255127, "E": -12.635085105895996 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -5.376846790313721, "scores": { "A": -7.0111308097839355, "B": -12.037229537963867, "C": -12.431285858154297, "D": -12.387977600097656, "E": -13.794790267944336 } } }, { "ex_id": "aqua-test-151", "gold": "C", "baseline": { "pred_label": "E", "correct": false, "margin": -2.647052764892578, "scores": { "A": -15.152583122253418, "B": -13.6299467086792, "C": -14.575118064880371, "D": -15.285728454589844, "E": -11.928065299987793 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.1320362091064453, "scores": { "A": -10.271482467651367, "B": -11.43825912475586, "C": -13.403518676757812, "D": -11.501873016357422, "E": -12.473245620727539 } } }, { "ex_id": "aqua-test-152", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.6472129821777344, "scores": { "A": -12.141305923461914, "B": -11.08128833770752, "C": -11.728501319885254, "D": -11.744885444641113, "E": -11.734070777893066 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.606741905212402, "scores": { "A": -8.290619850158691, "B": -12.897361755371094, "C": -16.176721572875977, "D": -13.130666732788086, "E": -13.918773651123047 } } }, { "ex_id": "aqua-test-153", "gold": "A", "baseline": { "pred_label": "A", "correct": true, "margin": 0.4722251892089844, "scores": { "A": -10.484762191772461, "B": -10.956987380981445, "C": -12.194547653198242, "D": -15.127632141113281, "E": -13.331162452697754 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 3.3751373291015625, "scores": { "A": -5.1184186935424805, "B": -8.740020751953125, "C": -8.493556022644043, "D": -12.901175498962402, "E": -12.068525314331055 } } }, { "ex_id": "aqua-test-154", "gold": "D", "baseline": { "pred_label": "A", "correct": false, "margin": -4.98802375793457, "scores": { "A": -10.425346374511719, "B": -11.881086349487305, "C": -11.785425186157227, "D": -15.413370132446289, "E": -13.525296211242676 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.682590961456299, "scores": { "A": -6.576329708099365, "B": -9.645816802978516, "C": -10.998794555664062, "D": -11.258920669555664, "E": -11.089090347290039 } } }, { "ex_id": "aqua-test-155", "gold": "A", "baseline": { "pred_label": "C", "correct": false, "margin": -3.2458534240722656, "scores": { "A": -11.75346565246582, "B": -10.164717674255371, "C": -8.507612228393555, "D": -12.298287391662598, "E": -12.295981407165527 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 1.698258399963379, "scores": { "A": -6.5401506423950195, "B": -9.082632064819336, "C": -8.238409042358398, "D": -9.46942138671875, "E": -8.837421417236328 } } }, { "ex_id": "aqua-test-156", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -1.5458030700683594, "scores": { "A": -11.058704376220703, "B": -9.512901306152344, "C": -10.548510551452637, "D": -10.738350868225098, "E": -10.144469261169434 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 0.5494518280029297, "scores": { "A": -8.577381134033203, "B": -9.532999038696289, "C": -10.54125690460205, "D": -9.21225643157959, "E": -9.126832962036133 } } }, { "ex_id": "aqua-test-157", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -0.2852640151977539, "scores": { "A": -10.463203430175781, "B": -8.768805503845215, "C": -9.054069519042969, "D": -9.892763137817383, "E": -10.059773445129395 } }, "ablated": { "pred_label": "E", "correct": false, "margin": -1.2878742218017578, "scores": { "A": -7.138072967529297, "B": -8.380763053894043, "C": -8.266191482543945, "D": -7.350512504577637, "E": -6.9783172607421875 } } }, { "ex_id": "aqua-test-158", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -2.5956363677978516, "scores": { "A": -11.858783721923828, "B": -9.868914604187012, "C": -12.355928421020508, "D": -12.464550971984863, "E": -12.451033592224121 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -0.8353786468505859, "scores": { "A": -7.951072692871094, "B": -10.968667030334473, "C": -9.150461196899414, "D": -8.78645133972168, "E": -9.640289306640625 } } }, { "ex_id": "aqua-test-159", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -1.2088556289672852, "scores": { "A": -14.390352249145508, "B": -10.970995903015137, "C": -12.179851531982422, "D": -14.215005874633789, "E": -13.121991157531738 } }, "ablated": { "pred_label": "D", "correct": false, "margin": -1.5493526458740234, "scores": { "A": -10.103426933288574, "B": -10.092862129211426, "C": -11.540170669555664, "D": -9.99081802368164, "E": -10.121420860290527 } } }, { "ex_id": "aqua-test-160", "gold": "D", "baseline": { "pred_label": "A", "correct": false, "margin": -2.848499298095703, "scores": { "A": -11.598515510559082, "B": -14.197969436645508, "C": -14.457886695861816, "D": -14.447014808654785, "E": -14.36185073852539 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.696338653564453, "scores": { "A": -7.641029357910156, "B": -11.26103687286377, "C": -12.703668594360352, "D": -12.33736801147461, "E": -12.883567810058594 } } }, { "ex_id": "aqua-test-161", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -2.1676807403564453, "scores": { "A": -14.895027160644531, "B": -10.322026252746582, "C": -12.489706993103027, "D": -12.704346656799316, "E": -15.176275253295898 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.219618797302246, "scores": { "A": -6.856925964355469, "B": -7.009190559387207, "C": -9.076544761657715, "D": -9.654449462890625, "E": -9.462542533874512 } } }, { "ex_id": "aqua-test-162", "gold": "C", "baseline": { "pred_label": "A", "correct": false, "margin": -2.2371959686279297, "scores": { "A": -10.148950576782227, "B": -10.49891471862793, "C": -12.386146545410156, "D": -11.85212516784668, "E": -11.997817993164062 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.578624725341797, "scores": { "A": -5.560625076293945, "B": -10.614078521728516, "C": -10.139249801635742, "D": -10.14529800415039, "E": -11.632416725158691 } } }, { "ex_id": "aqua-test-163", "gold": "C", "baseline": { "pred_label": "A", "correct": false, "margin": -1.3345470428466797, "scores": { "A": -10.16016960144043, "B": -10.63470458984375, "C": -11.49471664428711, "D": -14.061302185058594, "E": -11.96687126159668 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.617689609527588, "scores": { "A": -7.696357250213623, "B": -11.909870147705078, "C": -12.314046859741211, "D": -12.379276275634766, "E": -10.673864364624023 } } }, { "ex_id": "aqua-test-164", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -3.0201797485351562, "scores": { "A": -11.76971435546875, "B": -11.384875297546387, "C": -12.587923049926758, "D": -14.405055046081543, "E": -13.626121520996094 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.4918289184570312, "scores": { "A": -10.035612106323242, "B": -13.309289932250977, "C": -14.361808776855469, "D": -12.527441024780273, "E": -13.649295806884766 } } }, { "ex_id": "aqua-test-165", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -1.3930606842041016, "scores": { "A": -13.838768005371094, "B": -11.385400772094727, "C": -14.192607879638672, "D": -12.778461456298828, "E": -14.867376327514648 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.0367560386657715, "scores": { "A": -7.127369403839111, "B": -8.74629020690918, "C": -13.14265251159668, "D": -13.164125442504883, "E": -14.882063865661621 } } }, { "ex_id": "aqua-test-166", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -2.7403507232666016, "scores": { "A": -13.24523639678955, "B": -8.922253608703613, "C": -9.740599632263184, "D": -10.831602096557617, "E": -11.662604331970215 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -5.432188510894775, "scores": { "A": -6.22821569442749, "B": -8.795976638793945, "C": -9.08587646484375, "D": -9.576181411743164, "E": -11.660404205322266 } } }, { "ex_id": "aqua-test-167", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 2.286722183227539, "scores": { "A": -13.319049835205078, "B": -10.63465690612793, "C": -12.921379089355469, "D": -16.10821533203125, "E": -14.74123764038086 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.3104257583618164, "scores": { "A": -8.05471420288086, "B": -11.365139961242676, "C": -15.134896278381348, "D": -13.336740493774414, "E": -14.394715309143066 } } }, { "ex_id": "aqua-test-168", "gold": "D", "baseline": { "pred_label": "A", "correct": false, "margin": -1.5851516723632812, "scores": { "A": -11.949223518371582, "B": -12.088781356811523, "C": -12.600192070007324, "D": -13.534375190734863, "E": -13.724043846130371 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.811002731323242, "scores": { "A": -8.285033226013184, "B": -12.87575912475586, "C": -14.61474609375, "D": -13.096035957336426, "E": -14.469371795654297 } } }, { "ex_id": "aqua-test-169", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -2.259394645690918, "scores": { "A": -11.700346946716309, "B": -9.44095230102539, "C": -11.634363174438477, "D": -13.524284362792969, "E": -12.442931175231934 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 1.1425189971923828, "scores": { "A": -9.171747207641602, "B": -10.314266204833984, "C": -10.48922348022461, "D": -11.292402267456055, "E": -11.195283889770508 } } }, { "ex_id": "aqua-test-170", "gold": "E", "baseline": { "pred_label": "A", "correct": false, "margin": -1.667557716369629, "scores": { "A": -12.687468528747559, "B": -13.540651321411133, "C": -15.73199462890625, "D": -14.172163009643555, "E": -14.355026245117188 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.3893675804138184, "scores": { "A": -7.45543909072876, "B": -11.622758865356445, "C": -12.694786071777344, "D": -10.333147048950195, "E": -10.844806671142578 } } }, { "ex_id": "aqua-test-171", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -1.3269481658935547, "scores": { "A": -12.739081382751465, "B": -11.41213321685791, "C": -12.104532241821289, "D": -14.289388656616211, "E": -13.22745132446289 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 3.89689302444458, "scores": { "A": -7.288093090057373, "B": -11.184986114501953, "C": -14.11172103881836, "D": -13.88416862487793, "E": -14.152048110961914 } } }, { "ex_id": "aqua-test-172", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -2.552183151245117, "scores": { "A": -11.899900436401367, "B": -9.34771728515625, "C": -9.6818265914917, "D": -12.592266082763672, "E": -11.358457565307617 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 0.2487936019897461, "scores": { "A": -7.7580461502075195, "B": -8.006839752197266, "C": -8.965506553649902, "D": -10.227289199829102, "E": -8.758523941040039 } } }, { "ex_id": "aqua-test-173", "gold": "D", "baseline": { "pred_label": "A", "correct": false, "margin": -4.587629318237305, "scores": { "A": -9.585987091064453, "B": -13.276374816894531, "C": -13.367696762084961, "D": -14.173616409301758, "E": -12.080738067626953 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -7.505217552185059, "scores": { "A": -7.101271629333496, "B": -12.202089309692383, "C": -14.173044204711914, "D": -14.606489181518555, "E": -14.75442123413086 } } }, { "ex_id": "aqua-test-174", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -0.24216461181640625, "scores": { "A": -14.199091911315918, "B": -13.47407054901123, "C": -14.678143501281738, "D": -13.550527572631836, "E": -13.716235160827637 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -5.9764862060546875, "scores": { "A": -8.514163970947266, "B": -11.301080703735352, "C": -14.558061599731445, "D": -12.563972473144531, "E": -14.490650177001953 } } }, { "ex_id": "aqua-test-175", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -1.3755264282226562, "scores": { "A": -12.588652610778809, "B": -11.279211044311523, "C": -12.724414825439453, "D": -13.617168426513672, "E": -12.65473747253418 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.200804233551025, "scores": { "A": -7.825118541717529, "B": -12.34547233581543, "C": -15.431029319763184, "D": -11.632364273071289, "E": -14.025922775268555 } } }, { "ex_id": "aqua-test-176", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -2.7927188873291016, "scores": { "A": -10.986503601074219, "B": -10.264165878295898, "C": -13.056884765625, "D": -13.231691360473633, "E": -11.577075958251953 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.047300338745117, "scores": { "A": -8.569661140441895, "B": -10.96971607208252, "C": -12.616961479187012, "D": -10.226570129394531, "E": -10.359309196472168 } } }, { "ex_id": "aqua-test-177", "gold": "C", "baseline": { "pred_label": "A", "correct": false, "margin": -4.117347717285156, "scores": { "A": -9.619380950927734, "B": -9.922872543334961, "C": -13.73672866821289, "D": -11.895669937133789, "E": -11.525716781616211 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.858722686767578, "scores": { "A": -5.202523231506348, "B": -7.391201972961426, "C": -12.061245918273926, "D": -9.575565338134766, "E": -11.126143455505371 } } }, { "ex_id": "aqua-test-178", "gold": "E", "baseline": { "pred_label": "E", "correct": true, "margin": 0.44650745391845703, "scores": { "A": -13.206219673156738, "B": -11.094629287719727, "C": -12.79085922241211, "D": -12.61279582977295, "E": -10.64812183380127 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.734641075134277, "scores": { "A": -5.792222023010254, "B": -10.40644359588623, "C": -8.512224197387695, "D": -10.881692886352539, "E": -12.526863098144531 } } }, { "ex_id": "aqua-test-179", "gold": "B", "baseline": { "pred_label": "A", "correct": false, "margin": -1.587289810180664, "scores": { "A": -9.154937744140625, "B": -10.742227554321289, "C": -11.76572322845459, "D": -12.661623001098633, "E": -12.793743133544922 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.158479690551758, "scores": { "A": -5.91640567779541, "B": -8.074885368347168, "C": -10.336216926574707, "D": -10.604473114013672, "E": -12.273855209350586 } } }, { "ex_id": "aqua-test-180", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -2.503520965576172, "scores": { "A": -11.160909652709961, "B": -9.851707458496094, "C": -12.355228424072266, "D": -13.170286178588867, "E": -12.383331298828125 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.526637077331543, "scores": { "A": -10.631556510925293, "B": -11.933387756347656, "C": -12.158193588256836, "D": -11.02450942993164, "E": -11.620341300964355 } } }, { "ex_id": "aqua-test-181", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.7682161331176758, "scores": { "A": -9.629287719726562, "B": -8.861071586608887, "C": -11.832342147827148, "D": -11.63463020324707, "E": -10.680866241455078 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.5154037475585938, "scores": { "A": -7.565939903259277, "B": -9.081343650817871, "C": -10.455299377441406, "D": -9.157304763793945, "E": -9.032361030578613 } } }, { "ex_id": "aqua-test-182", "gold": "C", "baseline": { "pred_label": "A", "correct": false, "margin": -3.821702003479004, "scores": { "A": -11.69933032989502, "B": -15.636759757995605, "C": -15.521032333374023, "D": -15.261280059814453, "E": -15.790119171142578 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.4775166511535645, "scores": { "A": -7.3024001121521, "B": -11.616369247436523, "C": -13.779916763305664, "D": -14.841501235961914, "E": -15.299184799194336 } } }, { "ex_id": "aqua-test-183", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 1.4096593856811523, "scores": { "A": -12.662055969238281, "B": -9.372528076171875, "C": -10.782187461853027, "D": -13.160992622375488, "E": -13.141705513000488 } }, "ablated": { "pred_label": "C", "correct": false, "margin": -0.7217111587524414, "scores": { "A": -8.667959213256836, "B": -7.500253200531006, "C": -6.7785420417785645, "D": -9.29892349243164, "E": -10.76202392578125 } } }, { "ex_id": "aqua-test-184", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -3.1710405349731445, "scores": { "A": -11.335744857788086, "B": -9.919331550598145, "C": -11.165321350097656, "D": -13.090372085571289, "E": -12.288164138793945 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.067633628845215, "scores": { "A": -8.7251615524292, "B": -11.410130500793457, "C": -11.521978378295898, "D": -10.792795181274414, "E": -11.264982223510742 } } }, { "ex_id": "aqua-test-185", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -3.1894311904907227, "scores": { "A": -11.377983093261719, "B": -9.404431343078613, "C": -12.593862533569336, "D": -12.444841384887695, "E": -12.661911964416504 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -8.37000560760498, "scores": { "A": -5.967945098876953, "B": -12.820409774780273, "C": -14.337950706481934, "D": -13.287762641906738, "E": -14.76830005645752 } } }, { "ex_id": "aqua-test-186", "gold": "A", "baseline": { "pred_label": "A", "correct": true, "margin": 0.3980731964111328, "scores": { "A": -11.276969909667969, "B": -11.675043106079102, "C": -14.097780227661133, "D": -14.689929962158203, "E": -13.63922119140625 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 2.4271883964538574, "scores": { "A": -7.584385395050049, "B": -10.011573791503906, "C": -13.923393249511719, "D": -12.747108459472656, "E": -12.790593147277832 } } }, { "ex_id": "aqua-test-187", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -2.0718564987182617, "scores": { "A": -12.528532028198242, "B": -10.982802391052246, "C": -13.054658889770508, "D": -14.590221405029297, "E": -14.210501670837402 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.2736144065856934, "scores": { "A": -7.474527835845947, "B": -9.265643119812012, "C": -10.74814224243164, "D": -11.135126113891602, "E": -11.180526733398438 } } }, { "ex_id": "aqua-test-188", "gold": "E", "baseline": { "pred_label": "A", "correct": false, "margin": -3.121814727783203, "scores": { "A": -12.144158363342285, "B": -13.037331581115723, "C": -13.73487377166748, "D": -14.09709358215332, "E": -15.265973091125488 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -5.235786437988281, "scores": { "A": -11.129180908203125, "B": -13.703113555908203, "C": -16.397157669067383, "D": -12.766101837158203, "E": -16.364967346191406 } } }, { "ex_id": "aqua-test-189", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.20896244049072266, "scores": { "A": -14.54034423828125, "B": -11.554760932922363, "C": -11.811978340148926, "D": -11.763723373413086, "E": -13.348597526550293 } }, "ablated": { "pred_label": "D", "correct": false, "margin": -0.8642768859863281, "scores": { "A": -9.882810592651367, "B": -10.419057846069336, "C": -10.307378768920898, "D": -9.554780960083008, "E": -9.593378067016602 } } }, { "ex_id": "aqua-test-190", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 0.35360145568847656, "scores": { "A": -13.596860885620117, "B": -10.771349906921387, "C": -10.41774845123291, "D": -13.349145889282227, "E": -13.912391662597656 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.489037036895752, "scores": { "A": -6.3273138999938965, "B": -9.247300148010254, "C": -12.816350936889648, "D": -10.787364959716797, "E": -12.917289733886719 } } }, { "ex_id": "aqua-test-191", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.9574899673461914, "scores": { "A": -11.895600318908691, "B": -10.9381103515625, "C": -13.633337020874023, "D": -14.099964141845703, "E": -13.749225616455078 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.093125343322754, "scores": { "A": -6.434209823608398, "B": -8.527335166931152, "C": -11.775838851928711, "D": -11.290367126464844, "E": -12.324054718017578 } } }, { "ex_id": "aqua-test-192", "gold": "A", "baseline": { "pred_label": "C", "correct": false, "margin": -0.11513137817382812, "scores": { "A": -13.689857482910156, "B": -13.662582397460938, "C": -13.574726104736328, "D": -14.454401016235352, "E": -13.602828979492188 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 1.6858510971069336, "scores": { "A": -8.88024616241455, "B": -12.6011962890625, "C": -13.179601669311523, "D": -11.137103080749512, "E": -10.566097259521484 } } }, { "ex_id": "aqua-test-193", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -2.7755870819091797, "scores": { "A": -11.522911071777344, "B": -8.747323989868164, "C": -9.848695755004883, "D": -10.299760818481445, "E": -10.045204162597656 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 1.2680301666259766, "scores": { "A": -8.59853458404541, "B": -10.327086448669434, "C": -10.31645679473877, "D": -9.866564750671387, "E": -9.983407974243164 } } }, { "ex_id": "aqua-test-194", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -2.1696929931640625, "scores": { "A": -9.464816093444824, "B": -9.411468505859375, "C": -10.639501571655273, "D": -11.581161499023438, "E": -12.365375518798828 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.69554328918457, "scores": { "A": -7.166990280151367, "B": -12.7379150390625, "C": -14.002235412597656, "D": -13.862533569335938, "E": -15.497852325439453 } } }, { "ex_id": "aqua-test-195", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -4.067173004150391, "scores": { "A": -11.221086502075195, "B": -8.251949310302734, "C": -10.201787948608398, "D": -12.319122314453125, "E": -12.611976623535156 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.214510917663574, "scores": { "A": -8.04576301574707, "B": -9.183307647705078, "C": -9.281808853149414, "D": -10.260273933410645, "E": -10.553353309631348 } } }, { "ex_id": "aqua-test-196", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -1.8260202407836914, "scores": { "A": -12.807720184326172, "B": -10.98169994354248, "C": -11.949183464050293, "D": -12.729838371276855, "E": -14.811100006103516 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 1.4137496948242188, "scores": { "A": -9.012284278869629, "B": -11.167215347290039, "C": -11.748849868774414, "D": -10.426033973693848, "E": -12.733590126037598 } } }, { "ex_id": "aqua-test-197", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -0.6986474990844727, "scores": { "A": -10.992959022521973, "B": -8.939371109008789, "C": -9.638018608093262, "D": -9.939753532409668, "E": -10.469696998596191 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.4545230865478516, "scores": { "A": -8.536977767944336, "B": -10.056846618652344, "C": -11.991500854492188, "D": -8.791147232055664, "E": -9.266132354736328 } } }, { "ex_id": "aqua-test-198", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -0.42527008056640625, "scores": { "A": -11.604852676391602, "B": -9.509223937988281, "C": -9.934494018554688, "D": -13.406452178955078, "E": -11.831525802612305 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -0.08353328704833984, "scores": { "A": -9.30318546295166, "B": -11.121411323547363, "C": -9.38671875, "D": -11.00162410736084, "E": -11.414689064025879 } } }, { "ex_id": "aqua-test-199", "gold": "C", "baseline": { "pred_label": "A", "correct": false, "margin": -5.215788841247559, "scores": { "A": -10.268373489379883, "B": -14.280426025390625, "C": -15.484162330627441, "D": -16.044178009033203, "E": -13.105344772338867 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -5.14111328125, "scores": { "A": -9.658950805664062, "B": -12.755328178405762, "C": -14.800064086914062, "D": -14.560892105102539, "E": -15.274332046508789 } } }, { "ex_id": "aqua-test-200", "gold": "B", "baseline": { "pred_label": "E", "correct": false, "margin": -0.2883167266845703, "scores": { "A": -12.25977897644043, "B": -11.99930477142334, "C": -13.633522033691406, "D": -12.022893905639648, "E": -11.71098804473877 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.265233039855957, "scores": { "A": -7.928126335144043, "B": -11.193359375, "C": -13.55146598815918, "D": -9.998331069946289, "E": -9.261889457702637 } } }, { "ex_id": "aqua-test-201", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -0.5516147613525391, "scores": { "A": -10.827482223510742, "B": -10.275867462158203, "C": -10.488014221191406, "D": -11.649810791015625, "E": -12.461782455444336 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 1.869196891784668, "scores": { "A": -9.49152660369873, "B": -11.884713172912598, "C": -13.91677188873291, "D": -11.360723495483398, "E": -14.611146926879883 } } }, { "ex_id": "aqua-test-202", "gold": "B", "baseline": { "pred_label": "E", "correct": false, "margin": -1.6743907928466797, "scores": { "A": -11.541190147399902, "B": -11.870600700378418, "C": -10.76689338684082, "D": -11.914441108703613, "E": -10.196209907531738 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.4919681549072266, "scores": { "A": -6.873357772827148, "B": -10.365325927734375, "C": -13.835872650146484, "D": -13.012420654296875, "E": -13.628089904785156 } } }, { "ex_id": "aqua-test-203", "gold": "D", "baseline": { "pred_label": "C", "correct": false, "margin": -2.915132522583008, "scores": { "A": -9.924501419067383, "B": -9.837858200073242, "C": -8.515426635742188, "D": -11.430559158325195, "E": -10.028295516967773 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -0.9277362823486328, "scores": { "A": -8.02107048034668, "B": -9.770700454711914, "C": -10.0626220703125, "D": -8.948806762695312, "E": -10.232612609863281 } } }, { "ex_id": "aqua-test-204", "gold": "C", "baseline": { "pred_label": "A", "correct": false, "margin": -0.054775238037109375, "scores": { "A": -9.659589767456055, "B": -9.96756362915039, "C": -9.714365005493164, "D": -11.157163619995117, "E": -10.775384902954102 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.617983341217041, "scores": { "A": -6.012364864349365, "B": -10.016752243041992, "C": -10.630348205566406, "D": -11.478163719177246, "E": -11.222594261169434 } } }, { "ex_id": "aqua-test-205", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -2.753251075744629, "scores": { "A": -9.721230506896973, "B": -8.277044296264648, "C": -10.387093544006348, "D": -11.78427791595459, "E": -11.030295372009277 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.736443042755127, "scores": { "A": -7.844253063201904, "B": -9.998329162597656, "C": -11.479426383972168, "D": -11.290699005126953, "E": -12.580696105957031 } } }, { "ex_id": "aqua-test-206", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.18168067932128906, "scores": { "A": -11.602930068969727, "B": -10.961795806884766, "C": -11.143476486206055, "D": -12.837438583374023, "E": -14.00632095336914 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.0302047729492188, "scores": { "A": -8.013933181762695, "B": -11.044137954711914, "C": -12.337331771850586, "D": -11.77204704284668, "E": -14.158761024475098 } } }, { "ex_id": "aqua-test-207", "gold": "E", "baseline": { "pred_label": "C", "correct": false, "margin": -3.332465171813965, "scores": { "A": -12.935659408569336, "B": -13.335750579833984, "C": -10.853610038757324, "D": -15.803115844726562, "E": -14.186075210571289 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -8.302919387817383, "scores": { "A": -6.643090724945068, "B": -11.031190872192383, "C": -12.203893661499023, "D": -15.097414016723633, "E": -14.94601058959961 } } }, { "ex_id": "aqua-test-208", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -0.8153915405273438, "scores": { "A": -12.07571029663086, "B": -10.124650955200195, "C": -11.729717254638672, "D": -10.940042495727539, "E": -13.055669784545898 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.7086524963378906, "scores": { "A": -9.813956260681152, "B": -11.153253555297852, "C": -13.102922439575195, "D": -11.522608757019043, "E": -12.09807300567627 } } }, { "ex_id": "aqua-test-209", "gold": "E", "baseline": { "pred_label": "E", "correct": true, "margin": 0.9387226104736328, "scores": { "A": -10.875650405883789, "B": -9.770784378051758, "C": -13.167339324951172, "D": -10.202999114990234, "E": -8.832061767578125 } }, "ablated": { "pred_label": "E", "correct": true, "margin": 0.02905750274658203, "scores": { "A": -6.857519149780273, "B": -8.824195861816406, "C": -11.01872730255127, "D": -7.9446258544921875, "E": -6.828461647033691 } } }, { "ex_id": "aqua-test-210", "gold": "E", "baseline": { "pred_label": "A", "correct": false, "margin": -0.5599374771118164, "scores": { "A": -10.820873260498047, "B": -10.8345947265625, "C": -10.908698081970215, "D": -12.616942405700684, "E": -11.380810737609863 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -0.7156219482421875, "scores": { "A": -8.604469299316406, "B": -10.140972137451172, "C": -9.98969841003418, "D": -9.52783203125, "E": -9.320091247558594 } } }, { "ex_id": "aqua-test-211", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -2.5010337829589844, "scores": { "A": -9.232921600341797, "B": -8.809791564941406, "C": -10.77252197265625, "D": -11.31082534790039, "E": -9.859048843383789 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.274179458618164, "scores": { "A": -6.612133979797363, "B": -7.950355529785156, "C": -10.069632530212402, "D": -8.886313438415527, "E": -8.989130020141602 } } }, { "ex_id": "aqua-test-212", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 1.515838623046875, "scores": { "A": -11.57960319519043, "B": -9.604219436645508, "C": -11.120058059692383, "D": -11.739898681640625, "E": -12.83167839050293 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.382899284362793, "scores": { "A": -4.981387138366699, "B": -8.364286422729492, "C": -11.265626907348633, "D": -9.413225173950195, "E": -11.893355369567871 } } }, { "ex_id": "aqua-test-213", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -2.7208938598632812, "scores": { "A": -13.409758567810059, "B": -12.505935668945312, "C": -15.226829528808594, "D": -15.019231796264648, "E": -13.97078800201416 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.587521553039551, "scores": { "A": -7.238009452819824, "B": -11.375316619873047, "C": -13.825531005859375, "D": -12.193073272705078, "E": -12.065244674682617 } } }, { "ex_id": "aqua-test-214", "gold": "E", "baseline": { "pred_label": "C", "correct": false, "margin": -3.253298759460449, "scores": { "A": -10.732587814331055, "B": -9.240824699401855, "C": -8.385510444641113, "D": -11.663347244262695, "E": -11.638809204101562 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.647817611694336, "scores": { "A": -7.310015678405762, "B": -10.763197898864746, "C": -10.126215934753418, "D": -9.662254333496094, "E": -10.957833290100098 } } }, { "ex_id": "aqua-test-215", "gold": "A", "baseline": { "pred_label": "C", "correct": false, "margin": -6.795169830322266, "scores": { "A": -22.99382781982422, "B": -18.132843017578125, "C": -16.198657989501953, "D": -22.130014419555664, "E": -22.1824893951416 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 0.6526165008544922, "scores": { "A": -8.266807556152344, "B": -9.545059204101562, "C": -8.919424057006836, "D": -11.000604629516602, "E": -11.521963119506836 } } }, { "ex_id": "aqua-test-216", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -0.6342000961303711, "scores": { "A": -12.792722702026367, "B": -9.749557495117188, "C": -10.383757591247559, "D": -12.744852066040039, "E": -12.79257583618164 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.935434341430664, "scores": { "A": -7.502462387084961, "B": -9.681203842163086, "C": -10.437896728515625, "D": -10.083349227905273, "E": -10.514593124389648 } } }, { "ex_id": "aqua-test-217", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -3.419353485107422, "scores": { "A": -12.823972702026367, "B": -8.596864700317383, "C": -12.047069549560547, "D": -15.540777206420898, "E": -12.016218185424805 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.360109329223633, "scores": { "A": -6.694489479064941, "B": -7.449652671813965, "C": -9.901532173156738, "D": -10.334230422973633, "E": -9.054598808288574 } } }, { "ex_id": "aqua-test-218", "gold": "A", "baseline": { "pred_label": "A", "correct": true, "margin": 0.45700550079345703, "scores": { "A": -9.913926124572754, "B": -10.370931625366211, "C": -11.4942045211792, "D": -12.226011276245117, "E": -11.360005378723145 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 2.9241886138916016, "scores": { "A": -6.717060089111328, "B": -9.64124870300293, "C": -12.240556716918945, "D": -12.319646835327148, "E": -12.892836570739746 } } }, { "ex_id": "aqua-test-219", "gold": "C", "baseline": { "pred_label": "A", "correct": false, "margin": -0.8197288513183594, "scores": { "A": -10.761848449707031, "B": -10.766995429992676, "C": -11.58157730102539, "D": -11.309408187866211, "E": -10.906158447265625 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -5.418631553649902, "scores": { "A": -6.581605911254883, "B": -10.751213073730469, "C": -12.000237464904785, "D": -12.113553047180176, "E": -13.230367660522461 } } }, { "ex_id": "aqua-test-220", "gold": "E", "baseline": { "pred_label": "A", "correct": false, "margin": -1.4230737686157227, "scores": { "A": -9.55500316619873, "B": -10.829366683959961, "C": -12.407928466796875, "D": -12.210527420043945, "E": -10.978076934814453 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -8.069978713989258, "scores": { "A": -6.391247272491455, "B": -12.834989547729492, "C": -14.329586029052734, "D": -11.875961303710938, "E": -14.461225509643555 } } }, { "ex_id": "aqua-test-221", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 0.1843414306640625, "scores": { "A": -13.276018142700195, "B": -8.882017135620117, "C": -8.697675704956055, "D": -11.192451477050781, "E": -12.547571182250977 } }, "ablated": { "pred_label": "C", "correct": true, "margin": 0.23614788055419922, "scores": { "A": -7.982804298400879, "B": -8.148253440856934, "C": -7.680639266967773, "D": -9.554062843322754, "E": -7.916787147521973 } } }, { "ex_id": "aqua-test-222", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -3.8982372283935547, "scores": { "A": -10.80961799621582, "B": -9.526689529418945, "C": -12.049747467041016, "D": -13.4249267578125, "E": -14.212126731872559 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.658107757568359, "scores": { "A": -6.3620195388793945, "B": -8.394012451171875, "C": -11.750795364379883, "D": -11.020127296447754, "E": -11.600711822509766 } } }, { "ex_id": "aqua-test-223", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 3.090752601623535, "scores": { "A": -10.285022735595703, "B": -7.155424118041992, "C": -10.246176719665527, "D": -10.93359375, "E": -11.335384368896484 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.092008590698242, "scores": { "A": -4.6826276779174805, "B": -8.774636268615723, "C": -12.371101379394531, "D": -11.170863151550293, "E": -13.101846694946289 } } }, { "ex_id": "aqua-test-224", "gold": "D", "baseline": { "pred_label": "A", "correct": false, "margin": -3.33624267578125, "scores": { "A": -9.653928756713867, "B": -10.031352996826172, "C": -10.045028686523438, "D": -12.990171432495117, "E": -13.378705978393555 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -7.7593231201171875, "scores": { "A": -6.229616165161133, "B": -9.94646167755127, "C": -11.709576606750488, "D": -13.98893928527832, "E": -14.078731536865234 } } }, { "ex_id": "aqua-test-225", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -2.9616122245788574, "scores": { "A": -9.889963150024414, "B": -7.453649997711182, "C": -8.71200942993164, "D": -10.415262222290039, "E": -10.100379943847656 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.348393440246582, "scores": { "A": -7.418603897094727, "B": -8.728095054626465, "C": -9.700243949890137, "D": -8.766997337341309, "E": -10.309947967529297 } } }, { "ex_id": "aqua-test-226", "gold": "E", "baseline": { "pred_label": "C", "correct": false, "margin": -0.807805061340332, "scores": { "A": -9.130120277404785, "B": -8.975384712219238, "C": -8.7509126663208, "D": -10.859039306640625, "E": -9.558717727661133 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.761455535888672, "scores": { "A": -5.507261276245117, "B": -8.399494171142578, "C": -9.425889015197754, "D": -8.896235466003418, "E": -8.268716812133789 } } }, { "ex_id": "aqua-test-227", "gold": "B", "baseline": { "pred_label": "C", "correct": false, "margin": -2.220867156982422, "scores": { "A": -14.620243072509766, "B": -13.570455551147461, "C": -11.349588394165039, "D": -11.514875411987305, "E": -11.799421310424805 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.755785942077637, "scores": { "A": -8.976465225219727, "B": -13.732251167297363, "C": -14.044622421264648, "D": -10.583160400390625, "E": -14.6570405960083 } } }, { "ex_id": "aqua-test-228", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.06908798217773438, "scores": { "A": -13.294260025024414, "B": -10.70706558227539, "C": -10.776153564453125, "D": -14.082728385925293, "E": -14.882830619812012 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.2796850204467773, "scores": { "A": -7.156650543212891, "B": -8.436335563659668, "C": -9.495584487915039, "D": -10.117116928100586, "E": -8.917889595031738 } } }, { "ex_id": "aqua-test-229", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -4.243276596069336, "scores": { "A": -13.26862907409668, "B": -9.949518203735352, "C": -14.192794799804688, "D": -13.284774780273438, "E": -10.620906829833984 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.551429748535156, "scores": { "A": -7.223015785217285, "B": -8.682937622070312, "C": -11.774445533752441, "D": -8.85659408569336, "E": -8.720208168029785 } } }, { "ex_id": "aqua-test-230", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -0.39587879180908203, "scores": { "A": -9.808959007263184, "B": -9.706989288330078, "C": -10.10286808013916, "D": -11.030524253845215, "E": -11.224230766296387 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.8247079849243164, "scores": { "A": -7.614650726318359, "B": -10.1624174118042, "C": -10.439358711242676, "D": -8.709356307983398, "E": -9.494927406311035 } } }, { "ex_id": "aqua-test-231", "gold": "E", "baseline": { "pred_label": "C", "correct": false, "margin": -1.5809459686279297, "scores": { "A": -14.210000991821289, "B": -11.604068756103516, "C": -10.744209289550781, "D": -14.023458480834961, "E": -12.325155258178711 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.2639245986938477, "scores": { "A": -6.26756477355957, "B": -7.757082939147949, "C": -7.7208757400512695, "D": -8.944690704345703, "E": -8.531489372253418 } } }, { "ex_id": "aqua-test-232", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -3.394481658935547, "scores": { "A": -10.429929733276367, "B": -8.23812484741211, "C": -10.50958251953125, "D": -10.852899551391602, "E": -11.632606506347656 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.812300682067871, "scores": { "A": -6.170710563659668, "B": -8.007887840270996, "C": -9.030508041381836, "D": -8.483603477478027, "E": -10.983011245727539 } } }, { "ex_id": "aqua-test-233", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -2.781177520751953, "scores": { "A": -14.271334648132324, "B": -11.490157127380371, "C": -12.547819137573242, "D": -12.209135055541992, "E": -13.584470748901367 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 0.4316978454589844, "scores": { "A": -9.124231338500977, "B": -9.750629425048828, "C": -10.782835006713867, "D": -9.555929183959961, "E": -11.292634963989258 } } }, { "ex_id": "aqua-test-234", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -1.7564897537231445, "scores": { "A": -11.012960433959961, "B": -9.256470680236816, "C": -11.204137802124023, "D": -11.356718063354492, "E": -11.360950469970703 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 3.251865863800049, "scores": { "A": -5.590026378631592, "B": -8.84189224243164, "C": -12.897504806518555, "D": -9.304344177246094, "E": -11.349357604980469 } } }, { "ex_id": "aqua-test-235", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -2.0572433471679688, "scores": { "A": -11.001571655273438, "B": -8.944328308105469, "C": -10.169864654541016, "D": -11.891605377197266, "E": -10.417022705078125 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 1.4765863418579102, "scores": { "A": -7.271252632141113, "B": -8.747838973999023, "C": -9.36155891418457, "D": -10.69479751586914, "E": -9.831280708312988 } } }, { "ex_id": "aqua-test-236", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -0.42516517639160156, "scores": { "A": -10.618110656738281, "B": -10.19294548034668, "C": -10.441062927246094, "D": -13.496698379516602, "E": -11.223958969116211 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 2.4833927154541016, "scores": { "A": -7.2196502685546875, "B": -9.703042984008789, "C": -9.82933235168457, "D": -12.09399700164795, "E": -12.26220989227295 } } }, { "ex_id": "aqua-test-237", "gold": "D", "baseline": { "pred_label": "A", "correct": false, "margin": -2.568614959716797, "scores": { "A": -14.478540420532227, "B": -14.94182014465332, "C": -18.723243713378906, "D": -17.047155380249023, "E": -17.837533950805664 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.232480049133301, "scores": { "A": -8.632494926452637, "B": -12.854435920715332, "C": -15.726949691772461, "D": -11.864974975585938, "E": -14.554712295532227 } } }, { "ex_id": "aqua-test-238", "gold": "B", "baseline": { "pred_label": "D", "correct": false, "margin": -0.7253379821777344, "scores": { "A": -12.960672378540039, "B": -12.267729759216309, "C": -12.900382995605469, "D": -11.542391777038574, "E": -13.208388328552246 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.6987123489379883, "scores": { "A": -8.835456848144531, "B": -11.53416919708252, "C": -12.405285835266113, "D": -10.377281188964844, "E": -12.166045188903809 } } }, { "ex_id": "aqua-test-239", "gold": "A", "baseline": { "pred_label": "C", "correct": false, "margin": -1.5075511932373047, "scores": { "A": -10.213048934936523, "B": -9.266622543334961, "C": -8.705497741699219, "D": -12.055715560913086, "E": -11.688860893249512 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 3.3363800048828125, "scores": { "A": -6.8600263595581055, "B": -10.196406364440918, "C": -11.205855369567871, "D": -13.323162078857422, "E": -12.765695571899414 } } }, { "ex_id": "aqua-test-240", "gold": "E", "baseline": { "pred_label": "D", "correct": false, "margin": -0.44985103607177734, "scores": { "A": -15.143648147583008, "B": -14.28840446472168, "C": -18.351917266845703, "D": -14.222793579101562, "E": -14.67264461517334 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.2502288818359375, "scores": { "A": -9.40713882446289, "B": -11.652059555053711, "C": -14.1944580078125, "D": -11.099084854125977, "E": -12.657367706298828 } } }, { "ex_id": "aqua-test-241", "gold": "D", "baseline": { "pred_label": "B", "correct": false, "margin": -4.373614311218262, "scores": { "A": -10.808015823364258, "B": -8.874855041503906, "C": -10.419958114624023, "D": -13.248469352722168, "E": -12.38242244720459 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.841573715209961, "scores": { "A": -8.555763244628906, "B": -12.585406303405762, "C": -12.206342697143555, "D": -12.397336959838867, "E": -11.421467781066895 } } }, { "ex_id": "aqua-test-242", "gold": "C", "baseline": { "pred_label": "A", "correct": false, "margin": -2.8799476623535156, "scores": { "A": -9.328302383422852, "B": -10.89498519897461, "C": -12.208250045776367, "D": -12.965248107910156, "E": -11.71044921875 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -7.6321868896484375, "scores": { "A": -6.5676727294921875, "B": -11.704992294311523, "C": -14.199859619140625, "D": -14.442285537719727, "E": -14.39041519165039 } } }, { "ex_id": "aqua-test-243", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -6.371611595153809, "scores": { "A": -15.65360164642334, "B": -9.281990051269531, "C": -11.275196075439453, "D": -13.858661651611328, "E": -12.75399398803711 } }, "ablated": { "pred_label": "B", "correct": false, "margin": -1.5444035530090332, "scores": { "A": -8.703460693359375, "B": -7.159057140350342, "C": -9.622814178466797, "D": -11.823188781738281, "E": -12.516307830810547 } } }, { "ex_id": "aqua-test-244", "gold": "A", "baseline": { "pred_label": "C", "correct": false, "margin": -2.365335464477539, "scores": { "A": -13.74870491027832, "B": -12.62894058227539, "C": -11.383369445800781, "D": -12.943933486938477, "E": -12.694005966186523 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 3.0395584106445312, "scores": { "A": -6.322815895080566, "B": -9.362374305725098, "C": -10.0859956741333, "D": -10.893784523010254, "E": -12.42809772491455 } } }, { "ex_id": "aqua-test-245", "gold": "C", "baseline": { "pred_label": "B", "correct": false, "margin": -0.3645496368408203, "scores": { "A": -11.810342788696289, "B": -11.149602890014648, "C": -11.514152526855469, "D": -13.367142677307129, "E": -13.141554832458496 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.503069877624512, "scores": { "A": -7.401782989501953, "B": -13.384897232055664, "C": -13.904852867126465, "D": -14.09384822845459, "E": -15.8029146194458 } } }, { "ex_id": "aqua-test-246", "gold": "B", "baseline": { "pred_label": "E", "correct": false, "margin": -1.197403907775879, "scores": { "A": -13.818582534790039, "B": -14.779563903808594, "C": -15.105998992919922, "D": -14.267341613769531, "E": -13.582159996032715 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.314557075500488, "scores": { "A": -9.649901390075684, "B": -13.964458465576172, "C": -16.527318954467773, "D": -12.236238479614258, "E": -13.367414474487305 } } }, { "ex_id": "aqua-test-247", "gold": "A", "baseline": { "pred_label": "B", "correct": false, "margin": -4.482232093811035, "scores": { "A": -15.076606750488281, "B": -10.594374656677246, "C": -13.674264907836914, "D": -13.522222518920898, "E": -13.801025390625 } }, "ablated": { "pred_label": "A", "correct": true, "margin": 0.8972492218017578, "scores": { "A": -9.801979064941406, "B": -11.38330078125, "C": -12.110631942749023, "D": -10.699228286743164, "E": -12.660514831542969 } } }, { "ex_id": "aqua-test-248", "gold": "D", "baseline": { "pred_label": "E", "correct": false, "margin": -3.480362892150879, "scores": { "A": -14.004387855529785, "B": -13.113205909729004, "C": -13.897481918334961, "D": -15.721760749816895, "E": -12.241397857666016 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.1865768432617188, "scores": { "A": -8.702154159545898, "B": -9.582113265991211, "C": -11.823348999023438, "D": -11.888731002807617, "E": -9.788232803344727 } } }, { "ex_id": "aqua-test-249", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 0.821441650390625, "scores": { "A": -10.321834564208984, "B": -8.848502159118652, "C": -8.027060508728027, "D": -11.628623962402344, "E": -11.091792106628418 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.602280616760254, "scores": { "A": -6.007650375366211, "B": -7.634098052978516, "C": -8.609930992126465, "D": -8.443798065185547, "E": -8.685563087463379 } } }, { "ex_id": "aqua-test-250", "gold": "E", "baseline": { "pred_label": "C", "correct": false, "margin": -1.9709901809692383, "scores": { "A": -10.511480331420898, "B": -11.609233856201172, "C": -10.008577346801758, "D": -14.576160430908203, "E": -11.979567527770996 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -7.598491668701172, "scores": { "A": -8.06104850769043, "B": -11.577505111694336, "C": -12.608949661254883, "D": -14.756206512451172, "E": -15.659540176391602 } } }, { "ex_id": "aqua-test-251", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.07524585723876953, "scores": { "A": -9.908409118652344, "B": -9.833163261413574, "C": -12.424334526062012, "D": -11.275071144104004, "E": -10.72103214263916 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.978281021118164, "scores": { "A": -9.318894386291504, "B": -12.297175407409668, "C": -13.513100624084473, "D": -12.114720344543457, "E": -11.161179542541504 } } }, { "ex_id": "aqua-test-252", "gold": "C", "baseline": { "pred_label": "E", "correct": false, "margin": -0.18950843811035156, "scores": { "A": -10.37520980834961, "B": -9.517382621765137, "C": -9.654356002807617, "D": -10.586039543151855, "E": -9.464847564697266 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.239790916442871, "scores": { "A": -6.840622901916504, "B": -8.500364303588867, "C": -9.080413818359375, "D": -9.140447616577148, "E": -8.753503799438477 } } }, { "ex_id": "aqua-test-253", "gold": "E", "baseline": { "pred_label": "B", "correct": false, "margin": -3.153468132019043, "scores": { "A": -12.344278335571289, "B": -10.064801216125488, "C": -10.924477577209473, "D": -12.967808723449707, "E": -13.218269348144531 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -7.156527519226074, "scores": { "A": -4.787992477416992, "B": -6.941324234008789, "C": -8.889881134033203, "D": -9.577656745910645, "E": -11.944519996643066 } } } ], "flip_rows": [ { "ex_id": "aqua-test-2", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.2555389404296875, "scores": { "A": -11.233211517333984, "B": -10.210750579833984, "C": -13.17569351196289, "D": -12.437894821166992, "E": -10.466289520263672 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -7.949008941650391, "scores": { "A": -6.06699275970459, "B": -14.01600170135498, "C": -17.137845993041992, "D": -15.27363109588623, "E": -15.64785099029541 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 1.3901653289794922, "scores": { "A": -9.876066207885742, "B": -8.48590087890625, "C": -10.311349868774414, "D": -10.88787841796875, "E": -10.712956428527832 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 0.25553417205810547, "scores": { "A": -11.233206748962402, "B": -10.210748672485352, "C": -13.175691604614258, "D": -12.437891006469727, "E": -10.466282844543457 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 0.25553417205810547, "scores": { "A": -11.233206748962402, "B": -10.210748672485352, "C": -13.175691604614258, "D": -12.437891006469727, "E": -10.466282844543457 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -9.454591751098633, "scores": { "A": -5.419614791870117, "B": -14.87420654296875, "C": -18.10893440246582, "D": -16.861085891723633, "E": -17.0190372467041 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 1.7489757537841797, "scores": { "A": -10.085409164428711, "B": -8.336433410644531, "C": -10.132183074951172, "D": -10.820955276489258, "E": -10.653312683105469 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -6.734705924987793, "scores": { "A": -4.145016670227051, "B": -10.879722595214844, "C": -13.257842063903809, "D": -13.037062644958496, "E": -12.864790916442871 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -7.94901180267334, "scores": { "A": -6.066986083984375, "B": -14.015997886657715, "C": -17.13784408569336, "D": -15.273627281188965, "E": -15.647849082946777 } } }, { "ex_id": "aqua-test-5", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.953394889831543, "scores": { "A": -11.989723205566406, "B": -10.97428035736084, "C": -12.035185813903809, "D": -11.961091041564941, "E": -11.927675247192383 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.1877222061157227, "scores": { "A": -7.596570014953613, "B": -9.784292221069336, "C": -11.036355018615723, "D": -9.200647354125977, "E": -10.078826904296875 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 0.16080760955810547, "scores": { "A": -9.10636043548584, "B": -8.253414154052734, "C": -8.41422176361084, "D": -9.317205429077148, "E": -9.607017517089844 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 0.9533920288085938, "scores": { "A": -11.989713668823242, "B": -10.974275588989258, "C": -12.035181045532227, "D": -11.96108627319336, "E": -11.927667617797852 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 0.9533920288085938, "scores": { "A": -11.989713668823242, "B": -10.974275588989258, "C": -12.035181045532227, "D": -11.96108627319336, "E": -11.927667617797852 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -1.1618080139160156, "scores": { "A": -4.276651382446289, "B": -5.438459396362305, "C": -7.316925048828125, "D": -5.728630065917969, "E": -5.395031929016113 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 0.10317325592041016, "scores": { "A": -9.095845222473145, "B": -8.302581787109375, "C": -8.405755043029785, "D": -9.327710151672363, "E": -9.620680809020996 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -2.9572091102600098, "scores": { "A": -7.572333812713623, "B": -10.529542922973633, "C": -11.914779663085938, "D": -11.758302688598633, "E": -10.997583389282227 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -2.1877198219299316, "scores": { "A": -7.59656286239624, "B": -9.784282684326172, "C": -11.036344528198242, "D": -9.200637817382812, "E": -10.078821182250977 } } }, { "ex_id": "aqua-test-9", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 1.0833330154418945, "scores": { "A": -11.261035919189453, "B": -8.873366355895996, "C": -9.95669937133789, "D": -12.33233642578125, "E": -13.964797973632812 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.0129852294921875, "scores": { "A": -7.305376052856445, "B": -11.318361282348633, "C": -11.48718547821045, "D": -13.66738224029541, "E": -15.269938468933105 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 1.1214942932128906, "scores": { "A": -10.059557914733887, "B": -7.437822341918945, "C": -8.559316635131836, "D": -10.860220909118652, "E": -12.348688125610352 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 1.0833320617675781, "scores": { "A": -11.26103401184082, "B": -8.873364448547363, "C": -9.956696510314941, "D": -12.332334518432617, "E": -13.964792251586914 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 1.0833320617675781, "scores": { "A": -11.26103401184082, "B": -8.873364448547363, "C": -9.956696510314941, "D": -12.332334518432617, "E": -13.964792251586914 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -3.571709632873535, "scores": { "A": -4.2528157234191895, "B": -7.824525356292725, "C": -7.8429999351501465, "D": -10.935527801513672, "E": -12.20101547241211 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 1.0738887786865234, "scores": { "A": -9.967976570129395, "B": -7.351049423217773, "C": -8.424938201904297, "D": -10.737732887268066, "E": -12.164700508117676 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -3.493661880493164, "scores": { "A": -6.91084098815918, "B": -10.404502868652344, "C": -11.570510864257812, "D": -13.403081893920898, "E": -13.38132095336914 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -4.012986183166504, "scores": { "A": -7.3053789138793945, "B": -11.318365097045898, "C": -11.487188339233398, "D": -13.667381286621094, "E": -15.269935607910156 } } }, { "ex_id": "aqua-test-15", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.6604747772216797, "scores": { "A": -11.07632064819336, "B": -10.41584587097168, "C": -13.610551834106445, "D": -15.297096252441406, "E": -13.782489776611328 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.743229389190674, "scores": { "A": -6.1119704246521, "B": -10.855199813842773, "C": -11.251523971557617, "D": -11.053302764892578, "E": -13.566537857055664 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 0.7353744506835938, "scores": { "A": -9.318084716796875, "B": -8.582710266113281, "C": -10.295574188232422, "D": -11.628917694091797, "E": -11.761164665222168 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 0.6604728698730469, "scores": { "A": -11.076318740844727, "B": -10.41584587097168, "C": -13.610550880432129, "D": -15.297094345092773, "E": -13.782489776611328 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 0.6604728698730469, "scores": { "A": -11.076318740844727, "B": -10.41584587097168, "C": -13.610550880432129, "D": -15.297094345092773, "E": -13.782489776611328 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -3.7481865882873535, "scores": { "A": -7.6179327964782715, "B": -11.366119384765625, "C": -11.271610260009766, "D": -11.955974578857422, "E": -14.104389190673828 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 0.4087352752685547, "scores": { "A": -9.301450729370117, "B": -8.892715454101562, "C": -10.551101684570312, "D": -11.64991569519043, "E": -11.84024429321289 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -3.2450222969055176, "scores": { "A": -5.506385326385498, "B": -8.751407623291016, "C": -10.76029109954834, "D": -10.876399040222168, "E": -11.42264461517334 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -4.743227005004883, "scores": { "A": -6.111969947814941, "B": -10.855196952819824, "C": -11.251523971557617, "D": -11.053295135498047, "E": -13.566534042358398 } } }, { "ex_id": "aqua-test-16", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 2.796067237854004, "scores": { "A": -12.479905128479004, "B": -10.507231712341309, "C": -7.711164474487305, "D": -12.827747344970703, "E": -12.807977676391602 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -0.861086368560791, "scores": { "A": -7.834758281707764, "B": -9.467061996459961, "C": -8.695844650268555, "D": -9.597942352294922, "E": -11.696287155151367 } }, "patched_0": { "pred_label": "C", "correct": true, "margin": 1.3996143341064453, "scores": { "A": -10.024466514587402, "B": -9.449155807495117, "C": -8.049541473388672, "D": -10.041764259338379, "E": -11.3864164352417 } }, "patched_01": { "pred_label": "C", "correct": true, "margin": 2.7960658073425293, "scores": { "A": -12.479902267456055, "B": -10.507226943969727, "C": -7.711161136627197, "D": -12.827741622924805, "E": -12.807975769042969 } }, "patched_full": { "pred_label": "C", "correct": true, "margin": 2.7960658073425293, "scores": { "A": -12.479902267456055, "B": -10.507226943969727, "C": -7.711161136627197, "D": -12.827741622924805, "E": -12.807975769042969 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -1.8937911987304688, "scores": { "A": -9.395095825195312, "B": -11.238801956176758, "C": -11.288887023925781, "D": -11.238336563110352, "E": -13.501079559326172 } }, "control_time_shuffled": { "pred_label": "C", "correct": true, "margin": 1.4178781509399414, "scores": { "A": -9.956263542175293, "B": -9.215209007263184, "C": -7.797330856323242, "D": -9.89533805847168, "E": -11.219152450561523 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -0.8867502212524414, "scores": { "A": -6.518403053283691, "B": -7.824748992919922, "C": -7.405153274536133, "D": -6.778932571411133, "E": -8.86016845703125 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -0.8610877990722656, "scores": { "A": -7.83476448059082, "B": -9.467066764831543, "C": -8.695852279663086, "D": -9.597952842712402, "E": -11.696298599243164 } } }, { "ex_id": "aqua-test-21", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.7691888809204102, "scores": { "A": -10.39490795135498, "B": -9.62571907043457, "C": -12.538268089294434, "D": -12.220020294189453, "E": -11.351235389709473 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.5374608039855957, "scores": { "A": -6.129680156707764, "B": -9.66714096069336, "C": -10.800978660583496, "D": -10.67288875579834, "E": -11.187762260437012 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 1.620218276977539, "scores": { "A": -9.997575759887695, "B": -8.377357482910156, "C": -10.22830867767334, "D": -10.391581535339355, "E": -11.034626960754395 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 0.7691860198974609, "scores": { "A": -10.394902229309082, "B": -9.625716209411621, "C": -12.538259506225586, "D": -12.220011711120605, "E": -11.351226806640625 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 0.7691860198974609, "scores": { "A": -10.394902229309082, "B": -9.625716209411621, "C": -12.538259506225586, "D": -12.220011711120605, "E": -11.351226806640625 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -4.422626972198486, "scores": { "A": -5.9301066398620605, "B": -10.352733612060547, "C": -10.554861068725586, "D": -10.926000595092773, "E": -12.400789260864258 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 1.7119264602661133, "scores": { "A": -10.018722534179688, "B": -8.272918701171875, "C": -9.984845161437988, "D": -10.223934173583984, "E": -10.815324783325195 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -2.364543914794922, "scores": { "A": -10.713298797607422, "B": -13.077842712402344, "C": -13.290660858154297, "D": -12.429567337036133, "E": -14.533975601196289 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -3.5374622344970703, "scores": { "A": -6.129676818847656, "B": -9.667139053344727, "C": -10.800968170166016, "D": -10.67288589477539, "E": -11.18775463104248 } } }, { "ex_id": "aqua-test-25", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 0.06520843505859375, "scores": { "A": -12.949111938476562, "B": -12.246522903442383, "C": -12.181314468383789, "D": -12.397541046142578, "E": -13.614669799804688 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.5791339874267578, "scores": { "A": -8.643856048583984, "B": -10.894746780395508, "C": -10.222990036010742, "D": -9.472063064575195, "E": -10.86764144897461 } }, "patched_0": { "pred_label": "B", "correct": false, "margin": -0.1788043975830078, "scores": { "A": -9.795440673828125, "B": -8.286870002746582, "C": -8.46567440032959, "D": -9.396930694580078, "E": -10.46731185913086 } }, "patched_01": { "pred_label": "C", "correct": true, "margin": 0.06521415710449219, "scores": { "A": -12.949119567871094, "B": -12.246532440185547, "C": -12.181318283081055, "D": -12.397550582885742, "E": -13.614681243896484 } }, "patched_full": { "pred_label": "C", "correct": true, "margin": 0.06521415710449219, "scores": { "A": -12.949119567871094, "B": -12.246532440185547, "C": -12.181318283081055, "D": -12.397550582885742, "E": -13.614681243896484 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -1.7965564727783203, "scores": { "A": -5.875622272491455, "B": -8.843538284301758, "C": -7.672178745269775, "D": -7.943771839141846, "E": -9.51207447052002 } }, "control_time_shuffled": { "pred_label": "B", "correct": false, "margin": -0.07470321655273438, "scores": { "A": -9.802331924438477, "B": -8.519807815551758, "C": -8.594511032104492, "D": -9.518583297729492, "E": -10.49337387084961 } }, "control_shared_randvec": { "pred_label": "C", "correct": true, "margin": 0.5046224594116211, "scores": { "A": -7.623855113983154, "B": -8.038482666015625, "C": -6.829197406768799, "D": -7.33381986618042, "E": -8.407615661621094 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -1.579136848449707, "scores": { "A": -8.643851280212402, "B": -10.894744873046875, "C": -10.22298812866211, "D": -9.472061157226562, "E": -10.86764144897461 } } }, { "ex_id": "aqua-test-33", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 1.2154502868652344, "scores": { "A": -17.279247283935547, "B": -18.187232971191406, "C": -16.063796997070312, "D": -19.143869400024414, "E": -19.470874786376953 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -0.4487724304199219, "scores": { "A": -9.145519256591797, "B": -10.157659530639648, "C": -9.594291687011719, "D": -10.095281600952148, "E": -10.523807525634766 } }, "patched_0": { "pred_label": "C", "correct": true, "margin": 0.9382915496826172, "scores": { "A": -10.902482032775879, "B": -9.475619316101074, "C": -8.537327766418457, "D": -10.088809967041016, "E": -11.045086860656738 } }, "patched_01": { "pred_label": "C", "correct": true, "margin": 1.2154521942138672, "scores": { "A": -17.27924156188965, "B": -18.187225341796875, "C": -16.06378936767578, "D": -19.143863677978516, "E": -19.470867156982422 } }, "patched_full": { "pred_label": "C", "correct": true, "margin": 1.2154521942138672, "scores": { "A": -17.27924156188965, "B": -18.187225341796875, "C": -16.06378936767578, "D": -19.143863677978516, "E": -19.470867156982422 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -0.7954683303833008, "scores": { "A": -6.031587600708008, "B": -6.501745223999023, "C": -6.827055931091309, "D": -8.70362663269043, "E": -9.60076904296875 } }, "control_time_shuffled": { "pred_label": "C", "correct": true, "margin": 0.8526973724365234, "scores": { "A": -10.536826133728027, "B": -8.904891014099121, "C": -8.052193641662598, "D": -9.56973648071289, "E": -10.570178031921387 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -3.2699460983276367, "scores": { "A": -6.845300674438477, "B": -7.745532989501953, "C": -10.115246772766113, "D": -9.863080024719238, "E": -9.416638374328613 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -0.44877052307128906, "scores": { "A": -9.145517349243164, "B": -10.157659530639648, "C": -9.594287872314453, "D": -10.095277786254883, "E": -10.523809432983398 } } }, { "ex_id": "aqua-test-39", "gold": "A", "baseline": { "pred_label": "A", "correct": true, "margin": 1.704728126525879, "scores": { "A": -10.207995414733887, "B": -11.912723541259766, "C": -12.109935760498047, "D": -14.276583671569824, "E": -13.992156982421875 } }, "ablated": { "pred_label": "C", "correct": false, "margin": -0.4076976776123047, "scores": { "A": -10.194977760314941, "B": -10.153923988342285, "C": -9.787280082702637, "D": -11.554168701171875, "E": -10.806174278259277 } }, "patched_0": { "pred_label": "C", "correct": false, "margin": -0.2574920654296875, "scores": { "A": -9.42642593383789, "B": -9.74307632446289, "C": -9.168933868408203, "D": -10.273834228515625, "E": -9.463874816894531 } }, "patched_01": { "pred_label": "A", "correct": true, "margin": 1.7047252655029297, "scores": { "A": -10.207992553710938, "B": -11.912717819213867, "C": -12.109930038452148, "D": -14.276582717895508, "E": -13.99215316772461 } }, "patched_full": { "pred_label": "A", "correct": true, "margin": 1.7047252655029297, "scores": { "A": -10.207992553710938, "B": -11.912717819213867, "C": -12.109930038452148, "D": -14.276582717895508, "E": -13.99215316772461 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": true, "margin": 0.4817485809326172, "scores": { "A": -8.8492431640625, "B": -9.330991744995117, "C": -9.828338623046875, "D": -11.093040466308594, "E": -10.96759033203125 } }, "control_time_shuffled": { "pred_label": "C", "correct": false, "margin": -0.15412521362304688, "scores": { "A": -9.60675048828125, "B": -10.024761199951172, "C": -9.452625274658203, "D": -10.476186752319336, "E": -9.598836898803711 } }, "control_shared_randvec": { "pred_label": "C", "correct": false, "margin": -0.17638206481933594, "scores": { "A": -9.201507568359375, "B": -9.28551959991455, "C": -9.025125503540039, "D": -10.745898246765137, "E": -10.075105667114258 } }, "control_patch_nonshared": { "pred_label": "C", "correct": false, "margin": -0.4076995849609375, "scores": { "A": -10.194982528686523, "B": -10.153924942016602, "C": -9.787282943725586, "D": -11.554170608520508, "E": -10.806177139282227 } } }, { "ex_id": "aqua-test-47", "gold": "E", "baseline": { "pred_label": "E", "correct": true, "margin": 0.20550537109375, "scores": { "A": -11.954267501831055, "B": -12.503751754760742, "C": -12.114371299743652, "D": -13.045472145080566, "E": -11.748762130737305 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.0157623291015625, "scores": { "A": -9.386420249938965, "B": -11.835212707519531, "C": -13.338075637817383, "D": -12.148918151855469, "E": -13.402182579040527 } }, "patched_0": { "pred_label": "C", "correct": false, "margin": -0.8217716217041016, "scores": { "A": -11.986827850341797, "B": -12.437498092651367, "C": -11.280074119567871, "D": -12.387033462524414, "E": -12.101845741271973 } }, "patched_01": { "pred_label": "E", "correct": true, "margin": 0.20550537109375, "scores": { "A": -11.954263687133789, "B": -12.503748893737793, "C": -12.114365577697754, "D": -13.045466423034668, "E": -11.748758316040039 } }, "patched_full": { "pred_label": "E", "correct": true, "margin": 0.20550537109375, "scores": { "A": -11.954263687133789, "B": -12.503748893737793, "C": -12.114365577697754, "D": -13.045466423034668, "E": -11.748758316040039 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -3.8673534393310547, "scores": { "A": -8.755905151367188, "B": -11.388154983520508, "C": -13.46282958984375, "D": -11.259201049804688, "E": -12.623258590698242 } }, "control_time_shuffled": { "pred_label": "C", "correct": false, "margin": -0.8881673812866211, "scores": { "A": -11.76992416381836, "B": -12.467824935913086, "C": -11.171812057495117, "D": -12.422957420349121, "E": -12.059979438781738 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -3.76485538482666, "scores": { "A": -6.755273818969727, "B": -9.245403289794922, "C": -11.986200332641602, "D": -10.68335247039795, "E": -10.520129203796387 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -4.0157623291015625, "scores": { "A": -9.386425018310547, "B": -11.835214614868164, "C": -13.338083267211914, "D": -12.148921966552734, "E": -13.40218734741211 } } }, { "ex_id": "aqua-test-52", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.04578971862792969, "scores": { "A": -12.882274627685547, "B": -9.855215072631836, "C": -9.901004791259766, "D": -11.499755859375, "E": -10.678110122680664 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -0.4922494888305664, "scores": { "A": -5.136632442474365, "B": -5.628881931304932, "C": -6.605200290679932, "D": -6.88695764541626, "E": -6.429419994354248 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 0.19986343383789062, "scores": { "A": -10.225756645202637, "B": -6.857089042663574, "C": -7.056952476501465, "D": -9.577616691589355, "E": -9.770869255065918 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 0.04578971862792969, "scores": { "A": -12.882272720336914, "B": -9.855213165283203, "C": -9.901002883911133, "D": -11.499753952026367, "E": -10.678108215332031 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 0.04578971862792969, "scores": { "A": -12.882272720336914, "B": -9.855213165283203, "C": -9.901002883911133, "D": -11.499753952026367, "E": -10.678108215332031 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "B", "correct": true, "margin": 0.052666664123535156, "scores": { "A": -9.966214179992676, "B": -9.754355430603027, "C": -12.090447425842285, "D": -10.788393020629883, "E": -9.807022094726562 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 0.09168434143066406, "scores": { "A": -10.196533203125, "B": -6.679924488067627, "C": -6.771608829498291, "D": -9.311458587646484, "E": -9.494468688964844 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -0.650324821472168, "scores": { "A": -10.846677780151367, "B": -11.497002601623535, "C": -15.689571380615234, "D": -14.434303283691406, "E": -13.255485534667969 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -0.4922494888305664, "scores": { "A": -5.136631488800049, "B": -5.628880977630615, "C": -6.605197429656982, "D": -6.886956691741943, "E": -6.429421901702881 } } }, { "ex_id": "aqua-test-57", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.14557647705078125, "scores": { "A": -13.898555755615234, "B": -12.992910385131836, "C": -14.371723175048828, "D": -14.158893585205078, "E": -13.138486862182617 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.6548147201538086, "scores": { "A": -10.324930191040039, "B": -12.979744911193848, "C": -12.848653793334961, "D": -12.86312484741211, "E": -12.547582626342773 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 0.3462409973144531, "scores": { "A": -12.841978073120117, "B": -12.433549880981445, "C": -13.434564590454102, "D": -13.763471603393555, "E": -12.779790878295898 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 0.14557647705078125, "scores": { "A": -13.898560523986816, "B": -12.992914199829102, "C": -14.371731758117676, "D": -14.158897399902344, "E": -13.138490676879883 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 0.14557647705078125, "scores": { "A": -13.898560523986816, "B": -12.992914199829102, "C": -14.371731758117676, "D": -14.158897399902344, "E": -13.138490676879883 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -1.6947002410888672, "scores": { "A": -8.799111366271973, "B": -10.49381160736084, "C": -10.079784393310547, "D": -11.49155044555664, "E": -11.507024765014648 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 0.21564388275146484, "scores": { "A": -12.912389755249023, "B": -12.696745872497559, "C": -13.669480323791504, "D": -13.951339721679688, "E": -12.927538871765137 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -1.7318696975708008, "scores": { "A": -10.47717571258545, "B": -12.20904541015625, "C": -12.458807945251465, "D": -13.046340942382812, "E": -11.932548522949219 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -2.6548194885253906, "scores": { "A": -10.324928283691406, "B": -12.979747772216797, "C": -12.848652839660645, "D": -12.863126754760742, "E": -12.547584533691406 } } }, { "ex_id": "aqua-test-68", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.6945219039916992, "scores": { "A": -11.65401840209961, "B": -10.95949649810791, "C": -11.869510650634766, "D": -12.070514678955078, "E": -12.618841171264648 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.171473503112793, "scores": { "A": -9.029966354370117, "B": -11.20143985748291, "C": -11.244144439697266, "D": -11.500038146972656, "E": -10.598958015441895 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 0.03911018371582031, "scores": { "A": -10.191217422485352, "B": -9.777387619018555, "C": -9.816497802734375, "D": -11.066892623901367, "E": -10.560269355773926 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 0.6945209503173828, "scores": { "A": -11.654026985168457, "B": -10.959506034851074, "C": -11.869518280029297, "D": -12.07052230834961, "E": -12.61884593963623 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 0.6945209503173828, "scores": { "A": -11.654026985168457, "B": -10.959506034851074, "C": -11.869518280029297, "D": -12.07052230834961, "E": -12.61884593963623 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -2.0571775436401367, "scores": { "A": -8.271219253540039, "B": -10.328396797180176, "C": -9.616171836853027, "D": -10.940016746520996, "E": -9.240631103515625 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 0.05071544647216797, "scores": { "A": -10.167059898376465, "B": -9.610998153686523, "C": -9.661713600158691, "D": -11.012078285217285, "E": -10.494278907775879 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -1.482576847076416, "scores": { "A": -6.736016750335693, "B": -8.21859359741211, "C": -8.19207763671875, "D": -8.997150421142578, "E": -7.911620616912842 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -2.1714725494384766, "scores": { "A": -9.029961585998535, "B": -11.201434135437012, "C": -11.244138717651367, "D": -11.50003433227539, "E": -10.598953247070312 } } }, { "ex_id": "aqua-test-78", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 3.0808143615722656, "scores": { "A": -12.794174194335938, "B": -8.323003768920898, "C": -11.403818130493164, "D": -13.768218994140625, "E": -13.847496032714844 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.647706031799316, "scores": { "A": -5.634004592895508, "B": -10.281710624694824, "C": -11.297346115112305, "D": -12.075166702270508, "E": -12.413890838623047 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 1.6711091995239258, "scores": { "A": -10.68982982635498, "B": -7.3767290115356445, "C": -9.04783821105957, "D": -11.847247123718262, "E": -11.526897430419922 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 3.0808143615722656, "scores": { "A": -12.794168472290039, "B": -8.322998046875, "C": -11.403812408447266, "D": -13.768211364746094, "E": -13.847491264343262 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 3.0808143615722656, "scores": { "A": -12.794168472290039, "B": -8.322998046875, "C": -11.403812408447266, "D": -13.768211364746094, "E": -13.847491264343262 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -3.6936893463134766, "scores": { "A": -5.412115097045898, "B": -9.105804443359375, "C": -10.826802253723145, "D": -11.795136451721191, "E": -9.426908493041992 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 1.8150687217712402, "scores": { "A": -11.064653396606445, "B": -7.3195881843566895, "C": -9.13465690612793, "D": -11.857183456420898, "E": -11.807918548583984 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -2.0817832946777344, "scores": { "A": -6.042266845703125, "B": -8.12405014038086, "C": -8.708685874938965, "D": -9.721263885498047, "E": -7.989676475524902 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -4.647706985473633, "scores": { "A": -5.633998870849609, "B": -10.281705856323242, "C": -11.297343254089355, "D": -12.075161933898926, "E": -12.413885116577148 } } }, { "ex_id": "aqua-test-87", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.6314544677734375, "scores": { "A": -9.793952941894531, "B": -9.162498474121094, "C": -11.231021881103516, "D": -12.002910614013672, "E": -11.467964172363281 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.9217519760131836, "scores": { "A": -6.585877418518066, "B": -9.50762939453125, "C": -9.712257385253906, "D": -9.212251663208008, "E": -11.261186599731445 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 1.4228715896606445, "scores": { "A": -9.903858184814453, "B": -8.480986595153809, "C": -10.398112297058105, "D": -10.929282188415527, "E": -11.117732048034668 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 0.6314525604248047, "scores": { "A": -9.793954849243164, "B": -9.16250228881836, "C": -11.231022834777832, "D": -12.002912521362305, "E": -11.467966079711914 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 0.6314525604248047, "scores": { "A": -9.793954849243164, "B": -9.16250228881836, "C": -11.231022834777832, "D": -12.002912521362305, "E": -11.467966079711914 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -2.8357162475585938, "scores": { "A": -4.554224967956543, "B": -7.389941215515137, "C": -8.258695602416992, "D": -7.952755928039551, "E": -9.503331184387207 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 1.1273679733276367, "scores": { "A": -9.8775634765625, "B": -8.750195503234863, "C": -10.547350883483887, "D": -10.983156204223633, "E": -11.062116622924805 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -2.535149574279785, "scores": { "A": -3.4644904136657715, "B": -5.999639987945557, "C": -6.94714879989624, "D": -6.253420352935791, "E": -7.40266752243042 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -2.921750068664551, "scores": { "A": -6.585874557495117, "B": -9.507624626159668, "C": -9.712258338928223, "D": -9.212246894836426, "E": -11.261183738708496 } } }, { "ex_id": "aqua-test-100", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 0.1513805389404297, "scores": { "A": -9.272323608398438, "B": -9.739631652832031, "C": -9.120943069458008, "D": -10.063505172729492, "E": -10.608749389648438 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.166820049285889, "scores": { "A": -4.778280735015869, "B": -9.417329788208008, "C": -10.945100784301758, "D": -11.501747131347656, "E": -13.226821899414062 } }, "patched_0": { "pred_label": "C", "correct": true, "margin": 0.42541027069091797, "scores": { "A": -8.251370429992676, "B": -8.534682273864746, "C": -7.825960159301758, "D": -9.516815185546875, "E": -10.607525825500488 } }, "patched_01": { "pred_label": "C", "correct": true, "margin": 0.1513843536376953, "scores": { "A": -9.272323608398438, "B": -9.739627838134766, "C": -9.120939254760742, "D": -10.063497543334961, "E": -10.608743667602539 } }, "patched_full": { "pred_label": "C", "correct": true, "margin": 0.1513843536376953, "scores": { "A": -9.272323608398438, "B": -9.739627838134766, "C": -9.120939254760742, "D": -10.063497543334961, "E": -10.608743667602539 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -5.7572784423828125, "scores": { "A": -5.2430009841918945, "B": -9.474717140197754, "C": -11.000279426574707, "D": -10.994614601135254, "E": -11.884474754333496 } }, "control_time_shuffled": { "pred_label": "C", "correct": true, "margin": 0.5618510246276855, "scores": { "A": -8.301986694335938, "B": -8.291629791259766, "C": -7.72977876663208, "D": -9.342042922973633, "E": -10.563093185424805 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -6.407781600952148, "scores": { "A": -5.431789398193359, "B": -11.169084548950195, "C": -11.839570999145508, "D": -14.098028182983398, "E": -14.046358108520508 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -6.166818618774414, "scores": { "A": -4.778277397155762, "B": -9.417325019836426, "C": -10.945096015930176, "D": -11.501741409301758, "E": -13.226816177368164 } } }, { "ex_id": "aqua-test-103", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 1.1493282318115234, "scores": { "A": -9.748441696166992, "B": -8.529296875, "C": -9.693557739257812, "D": -11.449222564697266, "E": -9.678625106811523 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -0.7451763153076172, "scores": { "A": -7.868520736694336, "B": -8.613697052001953, "C": -10.544960975646973, "D": -9.806873321533203, "E": -8.439764022827148 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 1.2809290885925293, "scores": { "A": -8.969255447387695, "B": -7.4534783363342285, "C": -8.759740829467773, "D": -10.289947509765625, "E": -8.734407424926758 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 1.1493244171142578, "scores": { "A": -9.748445510864258, "B": -8.529302597045898, "C": -9.693565368652344, "D": -11.449226379394531, "E": -9.678627014160156 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 1.1493244171142578, "scores": { "A": -9.748445510864258, "B": -8.529302597045898, "C": -9.693565368652344, "D": -11.449226379394531, "E": -9.678627014160156 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "E", "correct": false, "margin": -0.2049875259399414, "scores": { "A": -5.457864761352539, "B": -5.467221260070801, "C": -8.277172088623047, "D": -7.82890510559082, "E": -5.262233734130859 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 0.9715185165405273, "scores": { "A": -8.980949401855469, "B": -7.786107063293457, "C": -9.11854076385498, "D": -10.403557777404785, "E": -8.757625579833984 } }, "control_shared_randvec": { "pred_label": "E", "correct": false, "margin": -1.1303739547729492, "scores": { "A": -7.216065406799316, "B": -7.767756462097168, "C": -9.236971855163574, "D": -8.464241981506348, "E": -6.637382507324219 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -0.7451763153076172, "scores": { "A": -7.8685197830200195, "B": -8.613696098327637, "C": -10.544960021972656, "D": -9.806873321533203, "E": -8.439760208129883 } } }, { "ex_id": "aqua-test-105", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 3.0392799377441406, "scores": { "A": -11.515534400939941, "B": -12.032148361206055, "C": -8.4762544631958, "D": -13.967401504516602, "E": -13.267354011535645 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.133573055267334, "scores": { "A": -7.463276386260986, "B": -9.80911636352539, "C": -8.59684944152832, "D": -13.382390975952148, "E": -13.58960247039795 } }, "patched_0": { "pred_label": "C", "correct": true, "margin": 2.2149181365966797, "scores": { "A": -10.347264289855957, "B": -10.250322341918945, "C": -8.035404205322266, "D": -12.775790214538574, "E": -12.733001708984375 } }, "patched_01": { "pred_label": "C", "correct": true, "margin": 3.039278030395508, "scores": { "A": -11.515533447265625, "B": -12.032148361206055, "C": -8.476255416870117, "D": -13.967406272888184, "E": -13.267354965209961 } }, "patched_full": { "pred_label": "C", "correct": true, "margin": 3.039278030395508, "scores": { "A": -11.515533447265625, "B": -12.032148361206055, "C": -8.476255416870117, "D": -13.967406272888184, "E": -13.267354965209961 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -1.6724023818969727, "scores": { "A": -5.971221446990967, "B": -7.549536228179932, "C": -7.6436238288879395, "D": -11.26807689666748, "E": -11.341324806213379 } }, "control_time_shuffled": { "pred_label": "C", "correct": true, "margin": 2.1505050659179688, "scores": { "A": -10.598121643066406, "B": -10.289155960083008, "C": -8.138650894165039, "D": -12.586099624633789, "E": -12.69243049621582 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -1.6481122970581055, "scores": { "A": -6.949349403381348, "B": -8.987531661987305, "C": -8.597461700439453, "D": -10.919259071350098, "E": -11.619161605834961 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -1.1335716247558594, "scores": { "A": -7.4632720947265625, "B": -9.809111595153809, "C": -8.596843719482422, "D": -13.382383346557617, "E": -13.589597702026367 } } }, { "ex_id": "aqua-test-111", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.18841552734375, "scores": { "A": -9.808207511901855, "B": -9.283623695373535, "C": -9.472039222717285, "D": -10.7572660446167, "E": -11.43770980834961 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.6287879943847656, "scores": { "A": -8.439443588256836, "B": -12.068231582641602, "C": -12.49129867553711, "D": -13.331933975219727, "E": -14.553701400756836 } }, "patched_0": { "pred_label": "C", "correct": false, "margin": -0.03671741485595703, "scores": { "A": -9.123326301574707, "B": -8.150065422058105, "C": -8.113348007202148, "D": -9.43490219116211, "E": -9.247275352478027 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 0.1884136199951172, "scores": { "A": -9.808208465576172, "B": -9.283626556396484, "C": -9.472040176391602, "D": -10.757265090942383, "E": -11.43770980834961 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 0.1884136199951172, "scores": { "A": -9.808208465576172, "B": -9.283626556396484, "C": -9.472040176391602, "D": -10.757265090942383, "E": -11.43770980834961 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -2.1629467010498047, "scores": { "A": -8.84734058380127, "B": -11.010287284851074, "C": -11.342278480529785, "D": -11.88167953491211, "E": -12.936474800109863 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 0.01021575927734375, "scores": { "A": -8.867430686950684, "B": -7.981822967529297, "C": -7.992038726806641, "D": -9.280956268310547, "E": -9.115897178649902 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -2.627413272857666, "scores": { "A": -7.264729022979736, "B": -9.892142295837402, "C": -9.825416564941406, "D": -10.610671997070312, "E": -11.448965072631836 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -3.628787040710449, "scores": { "A": -8.43944263458252, "B": -12.068229675292969, "C": -12.491294860839844, "D": -13.331932067871094, "E": -14.55370044708252 } } }, { "ex_id": "aqua-test-116", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 1.7076244354248047, "scores": { "A": -12.037410736083984, "B": -9.75387191772461, "C": -11.461496353149414, "D": -11.536352157592773, "E": -11.817276000976562 } }, "ablated": { "pred_label": "D", "correct": false, "margin": -0.5289134979248047, "scores": { "A": -7.282122611999512, "B": -7.493680000305176, "C": -8.805983543395996, "D": -6.964766502380371, "E": -7.28157901763916 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 1.3713750839233398, "scores": { "A": -8.353381156921387, "B": -6.982006072998047, "C": -8.779082298278809, "D": -8.85804271697998, "E": -9.427443504333496 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 1.707632064819336, "scores": { "A": -12.037415504455566, "B": -9.753875732421875, "C": -11.461507797241211, "D": -11.536357879638672, "E": -11.817279815673828 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 1.707632064819336, "scores": { "A": -12.037415504455566, "B": -9.753875732421875, "C": -11.461507797241211, "D": -11.536357879638672, "E": -11.817279815673828 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "E", "correct": false, "margin": -0.8397531509399414, "scores": { "A": -7.520603656768799, "B": -6.9838643074035645, "C": -9.986032485961914, "D": -7.869058132171631, "E": -6.144111156463623 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 1.511284351348877, "scores": { "A": -8.105411529541016, "B": -6.594127178192139, "C": -8.44879150390625, "D": -8.556886672973633, "E": -9.028934478759766 } }, "control_shared_randvec": { "pred_label": "B", "correct": true, "margin": 0.8564348220825195, "scores": { "A": -7.948397636413574, "B": -7.091962814331055, "C": -10.485963821411133, "D": -10.730182647705078, "E": -8.363138198852539 } }, "control_patch_nonshared": { "pred_label": "D", "correct": false, "margin": -0.5289154052734375, "scores": { "A": -7.282122611999512, "B": -7.493679046630859, "C": -8.80598258972168, "D": -6.964763641357422, "E": -7.281576156616211 } } }, { "ex_id": "aqua-test-120", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.3595123291015625, "scores": { "A": -12.646347045898438, "B": -10.183612823486328, "C": -10.54312515258789, "D": -11.979488372802734, "E": -12.640970230102539 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.8485918045043945, "scores": { "A": -7.463525772094727, "B": -12.312117576599121, "C": -11.753535270690918, "D": -12.008286476135254, "E": -13.767097473144531 } }, "patched_0": { "pred_label": "C", "correct": false, "margin": -0.4900999069213867, "scores": { "A": -12.293493270874023, "B": -9.351531982421875, "C": -8.861432075500488, "D": -10.692264556884766, "E": -10.770162582397461 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 0.3595142364501953, "scores": { "A": -12.646347045898438, "B": -10.183609008789062, "C": -10.543123245239258, "D": -11.979488372802734, "E": -12.640968322753906 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 0.3595142364501953, "scores": { "A": -12.646347045898438, "B": -10.183609008789062, "C": -10.543123245239258, "D": -11.979488372802734, "E": -12.640968322753906 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -4.472604751586914, "scores": { "A": -9.625394821166992, "B": -14.097999572753906, "C": -14.751794815063477, "D": -14.831222534179688, "E": -15.224090576171875 } }, "control_time_shuffled": { "pred_label": "C", "correct": false, "margin": -0.44132232666015625, "scores": { "A": -12.334344863891602, "B": -9.446332931518555, "C": -9.005010604858398, "D": -10.665205001831055, "E": -10.99666976928711 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -3.0735368728637695, "scores": { "A": -5.083780288696289, "B": -8.157317161560059, "C": -8.276480674743652, "D": -9.385171890258789, "E": -10.254486083984375 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -4.848588466644287, "scores": { "A": -7.463529109954834, "B": -12.312117576599121, "C": -11.753534317016602, "D": -12.008285522460938, "E": -13.767098426818848 } } }, { "ex_id": "aqua-test-122", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.6598358154296875, "scores": { "A": -11.076019287109375, "B": -10.416183471679688, "C": -13.238750457763672, "D": -13.289159774780273, "E": -13.489381790161133 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.3248538970947266, "scores": { "A": -6.911991119384766, "B": -9.236845016479492, "C": -12.405698776245117, "D": -10.99496078491211, "E": -12.164006233215332 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 0.3234901428222656, "scores": { "A": -10.238473892211914, "B": -9.914983749389648, "C": -12.03645133972168, "D": -12.105175018310547, "E": -13.415177345275879 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 0.6598358154296875, "scores": { "A": -11.076021194458008, "B": -10.41618537902832, "C": -13.23875617980957, "D": -13.289161682128906, "E": -13.489385604858398 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 0.6598358154296875, "scores": { "A": -11.076021194458008, "B": -10.41618537902832, "C": -13.23875617980957, "D": -13.289161682128906, "E": -13.489385604858398 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -2.418147087097168, "scores": { "A": -5.337198257446289, "B": -7.755345344543457, "C": -12.551668167114258, "D": -11.829740524291992, "E": -12.763933181762695 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 0.2971000671386719, "scores": { "A": -10.380821228027344, "B": -10.083721160888672, "C": -12.148942947387695, "D": -12.248394966125488, "E": -13.505158424377441 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -3.0811538696289062, "scores": { "A": -4.878448963165283, "B": -7.9596028327941895, "C": -11.607043266296387, "D": -9.50536823272705, "E": -11.090916633605957 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -2.3248538970947266, "scores": { "A": -6.911995887756348, "B": -9.236849784851074, "C": -12.405706405639648, "D": -10.994964599609375, "E": -12.164009094238281 } } }, { "ex_id": "aqua-test-123", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 1.928288459777832, "scores": { "A": -12.817946434020996, "B": -13.251622200012207, "C": -10.08199405670166, "D": -12.010282516479492, "E": -12.828923225402832 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.036839485168457, "scores": { "A": -7.727773666381836, "B": -10.925074577331543, "C": -11.764613151550293, "D": -11.528144836425781, "E": -13.928091049194336 } }, "patched_0": { "pred_label": "C", "correct": true, "margin": 0.1131134033203125, "scores": { "A": -11.61458969116211, "B": -11.275108337402344, "C": -11.161994934082031, "D": -11.986404418945312, "E": -12.851778030395508 } }, "patched_01": { "pred_label": "C", "correct": true, "margin": 1.9282875061035156, "scores": { "A": -12.817957878112793, "B": -13.251638412475586, "C": -10.082003593444824, "D": -12.01029109954834, "E": -12.82893180847168 } }, "patched_full": { "pred_label": "C", "correct": true, "margin": 1.9282875061035156, "scores": { "A": -12.817957878112793, "B": -13.251638412475586, "C": -10.082003593444824, "D": -12.01029109954834, "E": -12.82893180847168 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -4.267435073852539, "scores": { "A": -8.1873779296875, "B": -10.754793167114258, "C": -12.454813003540039, "D": -13.088325500488281, "E": -14.197637557983398 } }, "control_time_shuffled": { "pred_label": "C", "correct": true, "margin": 0.0443572998046875, "scores": { "A": -11.579950332641602, "B": -11.245656967163086, "C": -11.201299667358398, "D": -12.026390075683594, "E": -12.818794250488281 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -5.751577854156494, "scores": { "A": -4.197388172149658, "B": -8.354893684387207, "C": -9.948966026306152, "D": -9.155034065246582, "E": -10.130256652832031 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -4.036839485168457, "scores": { "A": -7.7277727127075195, "B": -10.925077438354492, "C": -11.764612197875977, "D": -11.528146743774414, "E": -13.928092002868652 } } }, { "ex_id": "aqua-test-125", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 0.26287078857421875, "scores": { "A": -12.876455307006836, "B": -12.006429672241211, "C": -10.34354305267334, "D": -10.606413841247559, "E": -11.505398750305176 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.1885986328125, "scores": { "A": -6.909121513366699, "B": -8.705928802490234, "C": -10.0977201461792, "D": -9.862305641174316, "E": -10.177146911621094 } }, "patched_0": { "pred_label": "B", "correct": false, "margin": -0.24047374725341797, "scores": { "A": -9.58531379699707, "B": -8.929466247558594, "C": -9.169939994812012, "D": -9.5785493850708, "E": -10.480676651000977 } }, "patched_01": { "pred_label": "C", "correct": true, "margin": 0.26287078857421875, "scores": { "A": -12.876452445983887, "B": -12.006429672241211, "C": -10.34354305267334, "D": -10.606413841247559, "E": -11.50539779663086 } }, "patched_full": { "pred_label": "C", "correct": true, "margin": 0.26287078857421875, "scores": { "A": -12.876452445983887, "B": -12.006429672241211, "C": -10.34354305267334, "D": -10.606413841247559, "E": -11.50539779663086 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -3.153430461883545, "scores": { "A": -7.0063958168029785, "B": -9.034759521484375, "C": -10.159826278686523, "D": -11.367905616760254, "E": -11.336196899414062 } }, "control_time_shuffled": { "pred_label": "B", "correct": false, "margin": -0.06798362731933594, "scores": { "A": -9.642061233520508, "B": -8.649129867553711, "C": -8.717113494873047, "D": -9.228824615478516, "E": -10.170103073120117 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -2.090330123901367, "scores": { "A": -7.855500221252441, "B": -8.707620620727539, "C": -9.945830345153809, "D": -13.191256523132324, "E": -12.100184440612793 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -3.1885976791381836, "scores": { "A": -6.909127235412598, "B": -8.7059326171875, "C": -10.097724914550781, "D": -9.862310409545898, "E": -10.177148818969727 } } }, { "ex_id": "aqua-test-130", "gold": "D", "baseline": { "pred_label": "D", "correct": true, "margin": 0.5736770629882812, "scores": { "A": -12.52768611907959, "B": -11.624752044677734, "C": -14.400633811950684, "D": -11.051074981689453, "E": -12.196588516235352 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -0.6399145126342773, "scores": { "A": -7.714714050292969, "B": -7.738489151000977, "C": -10.441914558410645, "D": -8.354628562927246, "E": -8.231303215026855 } }, "patched_0": { "pred_label": "B", "correct": false, "margin": -0.8715057373046875, "scores": { "A": -10.206192016601562, "B": -8.503612518310547, "C": -9.418848037719727, "D": -9.375118255615234, "E": -9.415210723876953 } }, "patched_01": { "pred_label": "D", "correct": true, "margin": 0.5736827850341797, "scores": { "A": -12.527690887451172, "B": -11.624759674072266, "C": -14.400640487670898, "D": -11.051076889038086, "E": -12.196598052978516 } }, "patched_full": { "pred_label": "D", "correct": true, "margin": 0.5736827850341797, "scores": { "A": -12.527690887451172, "B": -11.624759674072266, "C": -14.400640487670898, "D": -11.051076889038086, "E": -12.196598052978516 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "B", "correct": false, "margin": -1.5013294219970703, "scores": { "A": -5.733486652374268, "B": -5.340871334075928, "C": -8.164079666137695, "D": -6.842200756072998, "E": -6.404123783111572 } }, "control_time_shuffled": { "pred_label": "B", "correct": false, "margin": -0.8385581970214844, "scores": { "A": -10.177495002746582, "B": -8.628342628479004, "C": -9.545053482055664, "D": -9.466900825500488, "E": -9.466954231262207 } }, "control_shared_randvec": { "pred_label": "B", "correct": false, "margin": -1.8849029541015625, "scores": { "A": -7.422325134277344, "B": -7.01991081237793, "C": -9.694180488586426, "D": -8.904813766479492, "E": -8.477489471435547 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -0.6399135589599609, "scores": { "A": -7.714714050292969, "B": -7.738491058349609, "C": -10.441915512084961, "D": -8.35462760925293, "E": -8.231302261352539 } } }, { "ex_id": "aqua-test-140", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 1.090902328491211, "scores": { "A": -12.17054557800293, "B": -10.950679779052734, "C": -12.478940963745117, "D": -12.041582107543945, "E": -12.825494766235352 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.0806522369384766, "scores": { "A": -8.631109237670898, "B": -9.711761474609375, "C": -10.810302734375, "D": -10.214776992797852, "E": -11.603350639343262 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 1.1358776092529297, "scores": { "A": -9.581466674804688, "B": -8.445589065551758, "C": -9.826594352722168, "D": -10.325157165527344, "E": -11.036417961120605 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 1.0909004211425781, "scores": { "A": -12.17054557800293, "B": -10.950679779052734, "C": -12.478940963745117, "D": -12.041580200195312, "E": -12.825498580932617 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 1.0909004211425781, "scores": { "A": -12.17054557800293, "B": -10.950679779052734, "C": -12.478940963745117, "D": -12.041580200195312, "E": -12.825498580932617 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -1.6461381912231445, "scores": { "A": -8.256159782409668, "B": -9.902297973632812, "C": -11.741201400756836, "D": -11.071731567382812, "E": -12.105072975158691 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 1.1649103164672852, "scores": { "A": -9.316244125366211, "B": -8.151333808898926, "C": -9.497949600219727, "D": -10.047868728637695, "E": -10.842238426208496 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -1.3181371688842773, "scores": { "A": -7.826546669006348, "B": -9.144683837890625, "C": -11.221202850341797, "D": -10.469319343566895, "E": -11.426633834838867 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -1.0806512832641602, "scores": { "A": -8.631110191345215, "B": -9.711761474609375, "C": -10.810298919677734, "D": -10.214774131774902, "E": -11.603349685668945 } } }, { "ex_id": "aqua-test-141", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 0.3972196578979492, "scores": { "A": -15.668845176696777, "B": -14.022212028503418, "C": -12.345376968383789, "D": -12.742596626281738, "E": -13.434144973754883 } }, "ablated": { "pred_label": "E", "correct": false, "margin": -1.4827747344970703, "scores": { "A": -9.032247543334961, "B": -10.177014350891113, "C": -9.580657005310059, "D": -8.116410255432129, "E": -8.097882270812988 } }, "patched_0": { "pred_label": "C", "correct": true, "margin": 0.9053430557250977, "scores": { "A": -12.772378921508789, "B": -10.580516815185547, "C": -8.90491771697998, "D": -9.810260772705078, "E": -9.986860275268555 } }, "patched_01": { "pred_label": "C", "correct": true, "margin": 0.3972187042236328, "scores": { "A": -15.668844223022461, "B": -14.022214889526367, "C": -12.345376968383789, "D": -12.742595672607422, "E": -13.43414306640625 } }, "patched_full": { "pred_label": "C", "correct": true, "margin": 0.3972187042236328, "scores": { "A": -15.668844223022461, "B": -14.022214889526367, "C": -12.345376968383789, "D": -12.742595672607422, "E": -13.43414306640625 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "D", "correct": false, "margin": -0.9576740264892578, "scores": { "A": -8.790735244750977, "B": -9.536684036254883, "C": -9.24622917175293, "D": -8.288555145263672, "E": -8.68001937866211 } }, "control_time_shuffled": { "pred_label": "C", "correct": true, "margin": 0.9014320373535156, "scores": { "A": -12.741355895996094, "B": -10.564189910888672, "C": -8.901117324829102, "D": -9.802549362182617, "E": -9.944976806640625 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -2.398233413696289, "scores": { "A": -8.182514190673828, "B": -11.277816772460938, "C": -10.580747604370117, "D": -9.102690696716309, "E": -9.042513847351074 } }, "control_patch_nonshared": { "pred_label": "E", "correct": false, "margin": -1.4827728271484375, "scores": { "A": -9.032241821289062, "B": -10.177009582519531, "C": -9.580652236938477, "D": -8.116405487060547, "E": -8.097879409790039 } } }, { "ex_id": "aqua-test-148", "gold": "D", "baseline": { "pred_label": "D", "correct": true, "margin": 0.09283638000488281, "scores": { "A": -11.842838287353516, "B": -8.686580657958984, "C": -9.391075134277344, "D": -8.593744277954102, "E": -10.327585220336914 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -0.2917442321777344, "scores": { "A": -8.228094100952148, "B": -9.228675842285156, "C": -9.42142105102539, "D": -8.519838333129883, "E": -9.596782684326172 } }, "patched_0": { "pred_label": "B", "correct": false, "margin": -1.0341577529907227, "scores": { "A": -9.260769844055176, "B": -6.733394622802734, "C": -7.005693435668945, "D": -7.767552375793457, "E": -8.883651733398438 } }, "patched_01": { "pred_label": "D", "correct": true, "margin": 0.09284019470214844, "scores": { "A": -11.842844009399414, "B": -8.686589241027832, "C": -9.391081809997559, "D": -8.593749046325684, "E": -10.327591896057129 } }, "patched_full": { "pred_label": "D", "correct": true, "margin": 0.09284019470214844, "scores": { "A": -11.842844009399414, "B": -8.686589241027832, "C": -9.391081809997559, "D": -8.593749046325684, "E": -10.327591896057129 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -0.08560562133789062, "scores": { "A": -6.6732988357543945, "B": -7.1317548751831055, "C": -7.866713523864746, "D": -6.758904457092285, "E": -8.30843448638916 } }, "control_time_shuffled": { "pred_label": "B", "correct": false, "margin": -0.9941120147705078, "scores": { "A": -9.38866901397705, "B": -6.864222526550293, "C": -7.074477195739746, "D": -7.858334541320801, "E": -8.94621753692627 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -0.6664242744445801, "scores": { "A": -7.582589626312256, "B": -8.015180587768555, "C": -9.47393798828125, "D": -8.249013900756836, "E": -8.513860702514648 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -0.2917442321777344, "scores": { "A": -8.228096008300781, "B": -9.228679656982422, "C": -9.42142105102539, "D": -8.519840240478516, "E": -9.596784591674805 } } }, { "ex_id": "aqua-test-152", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.6472129821777344, "scores": { "A": -12.141305923461914, "B": -11.08128833770752, "C": -11.728501319885254, "D": -11.744885444641113, "E": -11.734070777893066 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.606741905212402, "scores": { "A": -8.290619850158691, "B": -12.897361755371094, "C": -16.176721572875977, "D": -13.130666732788086, "E": -13.918773651123047 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 0.7464790344238281, "scores": { "A": -10.325726509094238, "B": -9.529410362243652, "C": -10.725006103515625, "D": -10.360553741455078, "E": -10.27588939666748 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 0.6472129821777344, "scores": { "A": -12.141304969787598, "B": -11.081286430358887, "C": -11.728499412536621, "D": -11.744885444641113, "E": -11.7340726852417 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 0.6472129821777344, "scores": { "A": -12.141304969787598, "B": -11.081286430358887, "C": -11.728499412536621, "D": -11.744885444641113, "E": -11.7340726852417 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -3.180154800415039, "scores": { "A": -8.851917266845703, "B": -12.032072067260742, "C": -15.113250732421875, "D": -13.466560363769531, "E": -12.649953842163086 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 0.4894752502441406, "scores": { "A": -10.067070007324219, "B": -9.577594757080078, "C": -10.798778533935547, "D": -10.269950866699219, "E": -10.165655136108398 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -3.321539878845215, "scores": { "A": -8.855324745178223, "B": -12.176864624023438, "C": -15.197938919067383, "D": -12.816364288330078, "E": -12.050538063049316 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -4.606740951538086, "scores": { "A": -8.29061508178711, "B": -12.897356033325195, "C": -16.176713943481445, "D": -13.13066291809082, "E": -13.918767929077148 } } }, { "ex_id": "aqua-test-167", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 2.286722183227539, "scores": { "A": -13.319049835205078, "B": -10.63465690612793, "C": -12.921379089355469, "D": -16.10821533203125, "E": -14.74123764038086 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.3104257583618164, "scores": { "A": -8.05471420288086, "B": -11.365139961242676, "C": -15.134896278381348, "D": -13.336740493774414, "E": -14.394715309143066 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 1.7309093475341797, "scores": { "A": -11.87700366973877, "B": -10.14609432220459, "C": -11.895035743713379, "D": -13.453927040100098, "E": -13.269637107849121 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 2.286722183227539, "scores": { "A": -13.319046020507812, "B": -10.634654998779297, "C": -12.921377182006836, "D": -16.10821533203125, "E": -14.74123764038086 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 2.286722183227539, "scores": { "A": -13.319046020507812, "B": -10.634654998779297, "C": -12.921377182006836, "D": -16.10821533203125, "E": -14.74123764038086 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -2.7075023651123047, "scores": { "A": -8.526466369628906, "B": -11.233968734741211, "C": -16.785362243652344, "D": -15.479930877685547, "E": -15.917808532714844 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 1.8152027130126953, "scores": { "A": -12.013884544372559, "B": -10.008508682250977, "C": -11.823711395263672, "D": -13.332377433776855, "E": -13.145186424255371 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -3.2307591438293457, "scores": { "A": -7.303309917449951, "B": -10.534069061279297, "C": -14.878864288330078, "D": -14.44310188293457, "E": -14.893917083740234 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -3.3104257583618164, "scores": { "A": -8.054710388183594, "B": -11.36513614654541, "C": -15.13489055633545, "D": -13.336731910705566, "E": -14.394709587097168 } } }, { "ex_id": "aqua-test-178", "gold": "E", "baseline": { "pred_label": "E", "correct": true, "margin": 0.44650745391845703, "scores": { "A": -13.206219673156738, "B": -11.094629287719727, "C": -12.79085922241211, "D": -12.61279582977295, "E": -10.64812183380127 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.734641075134277, "scores": { "A": -5.792222023010254, "B": -10.40644359588623, "C": -8.512224197387695, "D": -10.881692886352539, "E": -12.526863098144531 } }, "patched_0": { "pred_label": "B", "correct": false, "margin": -2.943204402923584, "scores": { "A": -9.805769920349121, "B": -5.3867316246032715, "C": -6.29595947265625, "D": -9.170482635498047, "E": -8.329936027526855 } }, "patched_01": { "pred_label": "E", "correct": true, "margin": 0.4465036392211914, "scores": { "A": -13.206205368041992, "B": -11.094612121582031, "C": -12.790840148925781, "D": -12.612784385681152, "E": -10.64810848236084 } }, "patched_full": { "pred_label": "E", "correct": true, "margin": 0.4465036392211914, "scores": { "A": -13.206205368041992, "B": -11.094612121582031, "C": -12.790840148925781, "D": -12.612784385681152, "E": -10.64810848236084 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -4.96022367477417, "scores": { "A": -5.559600353240967, "B": -9.4379243850708, "C": -8.867037773132324, "D": -11.44845199584961, "E": -10.519824028015137 } }, "control_time_shuffled": { "pred_label": "B", "correct": false, "margin": -3.127613067626953, "scores": { "A": -9.935117721557617, "B": -5.414183616638184, "C": -6.386631965637207, "D": -9.185277938842773, "E": -8.541796684265137 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -6.373246669769287, "scores": { "A": -6.259435176849365, "B": -10.628332138061523, "C": -10.076188087463379, "D": -13.082308769226074, "E": -12.632681846618652 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -6.734642028808594, "scores": { "A": -5.792219161987305, "B": -10.406440734863281, "C": -8.512224197387695, "D": -10.881689071655273, "E": -12.526861190795898 } } }, { "ex_id": "aqua-test-181", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.7682161331176758, "scores": { "A": -9.629287719726562, "B": -8.861071586608887, "C": -11.832342147827148, "D": -11.63463020324707, "E": -10.680866241455078 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.5154037475585938, "scores": { "A": -7.565939903259277, "B": -9.081343650817871, "C": -10.455299377441406, "D": -9.157304763793945, "E": -9.032361030578613 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 1.180495262145996, "scores": { "A": -9.739990234375, "B": -8.418399810791016, "C": -9.598895072937012, "D": -10.474803924560547, "E": -9.851222038269043 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 0.7682132720947266, "scores": { "A": -9.629287719726562, "B": -8.861074447631836, "C": -11.832342147827148, "D": -11.634628295898438, "E": -10.680864334106445 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 0.7682132720947266, "scores": { "A": -9.629287719726562, "B": -8.861074447631836, "C": -11.832342147827148, "D": -11.634628295898438, "E": -10.680864334106445 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -0.6810026168823242, "scores": { "A": -4.672728061676025, "B": -5.35373067855835, "C": -7.516335964202881, "D": -6.342844486236572, "E": -4.79114294052124 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 1.1485824584960938, "scores": { "A": -9.888175964355469, "B": -8.739593505859375, "C": -9.970766067504883, "D": -10.718721389770508, "E": -10.002328872680664 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -0.9188671112060547, "scores": { "A": -9.10055160522461, "B": -10.019418716430664, "C": -11.787355422973633, "D": -10.639719009399414, "E": -9.353466033935547 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -1.515404224395752, "scores": { "A": -7.565932750701904, "B": -9.081336975097656, "C": -10.455291748046875, "D": -9.157295227050781, "E": -9.032354354858398 } } }, { "ex_id": "aqua-test-183", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 1.4096593856811523, "scores": { "A": -12.662055969238281, "B": -9.372528076171875, "C": -10.782187461853027, "D": -13.160992622375488, "E": -13.141705513000488 } }, "ablated": { "pred_label": "C", "correct": false, "margin": -0.7217111587524414, "scores": { "A": -8.667959213256836, "B": -7.500253200531006, "C": -6.7785420417785645, "D": -9.29892349243164, "E": -10.76202392578125 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 0.6202316284179688, "scores": { "A": -10.945490837097168, "B": -7.309451103210449, "C": -7.929682731628418, "D": -10.611489295959473, "E": -11.70968246459961 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 1.409658432006836, "scores": { "A": -12.662059783935547, "B": -9.372528076171875, "C": -10.782186508178711, "D": -13.160991668701172, "E": -13.141706466674805 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 1.409658432006836, "scores": { "A": -12.662059783935547, "B": -9.372528076171875, "C": -10.782186508178711, "D": -13.160991668701172, "E": -13.141706466674805 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "C", "correct": false, "margin": -0.8798198699951172, "scores": { "A": -10.624456405639648, "B": -9.079212188720703, "C": -8.199392318725586, "D": -13.810981750488281, "E": -14.749567031860352 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 0.6436195373535156, "scores": { "A": -10.855729103088379, "B": -7.0558881759643555, "C": -7.699507713317871, "D": -10.375727653503418, "E": -11.54419231414795 } }, "control_shared_randvec": { "pred_label": "C", "correct": false, "margin": -0.7752676010131836, "scores": { "A": -7.476681232452393, "B": -7.795263767242432, "C": -7.019996166229248, "D": -9.245484352111816, "E": -9.37934684753418 } }, "control_patch_nonshared": { "pred_label": "C", "correct": false, "margin": -0.7217121124267578, "scores": { "A": -8.667959213256836, "B": -7.500255584716797, "C": -6.778543472290039, "D": -9.298927307128906, "E": -10.762025833129883 } } }, { "ex_id": "aqua-test-189", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.20896244049072266, "scores": { "A": -14.54034423828125, "B": -11.554760932922363, "C": -11.811978340148926, "D": -11.763723373413086, "E": -13.348597526550293 } }, "ablated": { "pred_label": "D", "correct": false, "margin": -0.8642768859863281, "scores": { "A": -9.882810592651367, "B": -10.419057846069336, "C": -10.307378768920898, "D": -9.554780960083008, "E": -9.593378067016602 } }, "patched_0": { "pred_label": "C", "correct": false, "margin": -0.7312335968017578, "scores": { "A": -11.619770050048828, "B": -9.795265197753906, "C": -9.064031600952148, "D": -9.905113220214844, "E": -9.719362258911133 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 0.20896339416503906, "scores": { "A": -14.54034423828125, "B": -11.554759979248047, "C": -11.811981201171875, "D": -11.763723373413086, "E": -13.348596572875977 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 0.20896339416503906, "scores": { "A": -14.54034423828125, "B": -11.554759979248047, "C": -11.811981201171875, "D": -11.763723373413086, "E": -13.348596572875977 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "D", "correct": false, "margin": -0.6974210739135742, "scores": { "A": -9.276986122131348, "B": -9.103320121765137, "C": -10.14097785949707, "D": -8.405899047851562, "E": -8.49357795715332 } }, "control_time_shuffled": { "pred_label": "C", "correct": false, "margin": -0.7965354919433594, "scores": { "A": -11.578865051269531, "B": -9.813770294189453, "C": -9.017234802246094, "D": -9.923456192016602, "E": -9.655179977416992 } }, "control_shared_randvec": { "pred_label": "E", "correct": false, "margin": -0.7022542953491211, "scores": { "A": -6.854315757751465, "B": -6.292705535888672, "C": -6.938782215118408, "D": -5.983695030212402, "E": -5.590451240539551 } }, "control_patch_nonshared": { "pred_label": "D", "correct": false, "margin": -0.8642749786376953, "scores": { "A": -9.88280963897705, "B": -10.419052124023438, "C": -10.30737590789795, "D": -9.554777145385742, "E": -9.59337329864502 } } }, { "ex_id": "aqua-test-190", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 0.35360145568847656, "scores": { "A": -13.596860885620117, "B": -10.771349906921387, "C": -10.41774845123291, "D": -13.349145889282227, "E": -13.912391662597656 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -6.489037036895752, "scores": { "A": -6.3273138999938965, "B": -9.247300148010254, "C": -12.816350936889648, "D": -10.787364959716797, "E": -12.917289733886719 } }, "patched_0": { "pred_label": "C", "correct": true, "margin": 0.29494571685791016, "scores": { "A": -10.62340259552002, "B": -9.846135139465332, "C": -9.551189422607422, "D": -10.956984519958496, "E": -11.488529205322266 } }, "patched_01": { "pred_label": "C", "correct": true, "margin": 0.35360145568847656, "scores": { "A": -13.59686279296875, "B": -10.771347999572754, "C": -10.417746543884277, "D": -13.34914493560791, "E": -13.912391662597656 } }, "patched_full": { "pred_label": "C", "correct": true, "margin": 0.35360145568847656, "scores": { "A": -13.59686279296875, "B": -10.771347999572754, "C": -10.417746543884277, "D": -13.34914493560791, "E": -13.912391662597656 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -6.209604263305664, "scores": { "A": -7.986570358276367, "B": -10.776678085327148, "C": -14.196174621582031, "D": -12.462160110473633, "E": -14.763839721679688 } }, "control_time_shuffled": { "pred_label": "C", "correct": true, "margin": 0.43008899688720703, "scores": { "A": -10.530719757080078, "B": -10.036431312561035, "C": -9.606342315673828, "D": -10.94388198852539, "E": -11.484930038452148 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -4.7598161697387695, "scores": { "A": -8.101582527160645, "B": -8.396411895751953, "C": -12.861398696899414, "D": -12.736745834350586, "E": -13.218839645385742 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -6.489035606384277, "scores": { "A": -6.327314376831055, "B": -9.247294425964355, "C": -12.816349983215332, "D": -10.787363052368164, "E": -12.917287826538086 } } }, { "ex_id": "aqua-test-191", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.9574899673461914, "scores": { "A": -11.895600318908691, "B": -10.9381103515625, "C": -13.633337020874023, "D": -14.099964141845703, "E": -13.749225616455078 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.093125343322754, "scores": { "A": -6.434209823608398, "B": -8.527335166931152, "C": -11.775838851928711, "D": -11.290367126464844, "E": -12.324054718017578 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 1.0822925567626953, "scores": { "A": -8.277904510498047, "B": -7.195611953735352, "C": -8.55109977722168, "D": -9.725017547607422, "E": -9.788521766662598 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 0.9574871063232422, "scores": { "A": -11.895593643188477, "B": -10.938106536865234, "C": -13.63333511352539, "D": -14.099959373474121, "E": -13.749227523803711 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 0.9574871063232422, "scores": { "A": -11.895593643188477, "B": -10.938106536865234, "C": -13.63333511352539, "D": -14.099959373474121, "E": -13.749227523803711 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -1.307638168334961, "scores": { "A": -6.399979591369629, "B": -7.70761775970459, "C": -11.518457412719727, "D": -11.318992614746094, "E": -11.238313674926758 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 1.0426197052001953, "scores": { "A": -8.261636734008789, "B": -7.219017028808594, "C": -8.581559181213379, "D": -9.666144371032715, "E": -9.812707901000977 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -1.6853055953979492, "scores": { "A": -7.959956169128418, "B": -9.645261764526367, "C": -13.037406921386719, "D": -12.513145446777344, "E": -9.820344924926758 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -2.093125820159912, "scores": { "A": -6.434208393096924, "B": -8.527334213256836, "C": -11.775838851928711, "D": -11.290367126464844, "E": -12.324055671691895 } } }, { "ex_id": "aqua-test-206", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.18168067932128906, "scores": { "A": -11.602930068969727, "B": -10.961795806884766, "C": -11.143476486206055, "D": -12.837438583374023, "E": -14.00632095336914 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.0302047729492188, "scores": { "A": -8.013933181762695, "B": -11.044137954711914, "C": -12.337331771850586, "D": -11.77204704284668, "E": -14.158761024475098 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 0.8052225112915039, "scores": { "A": -11.124311447143555, "B": -10.31908893585205, "C": -11.636249542236328, "D": -12.520262718200684, "E": -14.125991821289062 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 0.18168067932128906, "scores": { "A": -11.60293197631836, "B": -10.961797714233398, "C": -11.143478393554688, "D": -12.837438583374023, "E": -14.006319046020508 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 0.18168067932128906, "scores": { "A": -11.60293197631836, "B": -10.961797714233398, "C": -11.143478393554688, "D": -12.837438583374023, "E": -14.006319046020508 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -2.3860421180725098, "scores": { "A": -6.778487682342529, "B": -9.164529800415039, "C": -12.009851455688477, "D": -12.049808502197266, "E": -13.598691940307617 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 0.9240913391113281, "scores": { "A": -11.181600570678711, "B": -10.257509231567383, "C": -11.613018035888672, "D": -12.530416488647461, "E": -13.934183120727539 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -2.196244239807129, "scores": { "A": -7.369185447692871, "B": -9.5654296875, "C": -11.989535331726074, "D": -11.94742488861084, "E": -13.421416282653809 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -3.0302047729492188, "scores": { "A": -8.013933181762695, "B": -11.044137954711914, "C": -12.337331771850586, "D": -11.772050857543945, "E": -14.15876579284668 } } }, { "ex_id": "aqua-test-212", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 1.515838623046875, "scores": { "A": -11.57960319519043, "B": -9.604219436645508, "C": -11.120058059692383, "D": -11.739898681640625, "E": -12.83167839050293 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -3.382899284362793, "scores": { "A": -4.981387138366699, "B": -8.364286422729492, "C": -11.265626907348633, "D": -9.413225173950195, "E": -11.893355369567871 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 1.4837512969970703, "scores": { "A": -9.99412727355957, "B": -7.378774642944336, "C": -8.862525939941406, "D": -9.535578727722168, "E": -9.807991981506348 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 1.5158329010009766, "scores": { "A": -11.579606056213379, "B": -9.604227066040039, "C": -11.120059967041016, "D": -11.739900588989258, "E": -12.831683158874512 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 1.5158329010009766, "scores": { "A": -11.579606056213379, "B": -9.604227066040039, "C": -11.120059967041016, "D": -11.739900588989258, "E": -12.831683158874512 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -3.0996170043945312, "scores": { "A": -5.803328514099121, "B": -8.902945518493652, "C": -12.642228126525879, "D": -12.522542953491211, "E": -14.157147407531738 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 1.5297069549560547, "scores": { "A": -10.015952110290527, "B": -7.30755615234375, "C": -8.837263107299805, "D": -9.575658798217773, "E": -9.859335899353027 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -2.132758140563965, "scores": { "A": -7.283851623535156, "B": -9.416609764099121, "C": -12.729516983032227, "D": -10.042702674865723, "E": -12.341903686523438 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -3.382904052734375, "scores": { "A": -4.981382369995117, "B": -8.364286422729492, "C": -11.26562213897705, "D": -9.413222312927246, "E": -11.893354415893555 } } }, { "ex_id": "aqua-test-223", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 3.090752601623535, "scores": { "A": -10.285022735595703, "B": -7.155424118041992, "C": -10.246176719665527, "D": -10.93359375, "E": -11.335384368896484 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -4.092008590698242, "scores": { "A": -4.6826276779174805, "B": -8.774636268615723, "C": -12.371101379394531, "D": -11.170863151550293, "E": -13.101846694946289 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 2.5026049613952637, "scores": { "A": -10.422887802124023, "B": -5.899902820587158, "C": -8.402507781982422, "D": -9.963022232055664, "E": -10.305240631103516 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 3.0907530784606934, "scores": { "A": -10.285022735595703, "B": -7.155422687530518, "C": -10.246175765991211, "D": -10.933595657348633, "E": -11.335386276245117 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 3.0907530784606934, "scores": { "A": -10.285022735595703, "B": -7.155422687530518, "C": -10.246175765991211, "D": -10.933595657348633, "E": -11.335386276245117 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -2.352473258972168, "scores": { "A": -4.42826509475708, "B": -6.780738353729248, "C": -12.333539009094238, "D": -11.84121036529541, "E": -13.286885261535645 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 2.4003238677978516, "scores": { "A": -10.343778610229492, "B": -5.7316436767578125, "C": -8.131967544555664, "D": -9.640758514404297, "E": -10.208199501037598 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -3.1807565689086914, "scores": { "A": -5.085258483886719, "B": -8.26601505279541, "C": -12.408848762512207, "D": -11.484879493713379, "E": -12.500157356262207 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -4.092006683349609, "scores": { "A": -4.682626724243164, "B": -8.774633407592773, "C": -12.371101379394531, "D": -11.17086410522461, "E": -13.101846694946289 } } }, { "ex_id": "aqua-test-228", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.06908798217773438, "scores": { "A": -13.294260025024414, "B": -10.70706558227539, "C": -10.776153564453125, "D": -14.082728385925293, "E": -14.882830619812012 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -1.2796850204467773, "scores": { "A": -7.156650543212891, "B": -8.436335563659668, "C": -9.495584487915039, "D": -10.117116928100586, "E": -8.917889595031738 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 0.7942695617675781, "scores": { "A": -8.922301292419434, "B": -6.567187309265137, "C": -7.361456871032715, "D": -9.64250659942627, "E": -9.15162181854248 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 0.06908607482910156, "scores": { "A": -13.294256210327148, "B": -10.70706558227539, "C": -10.776151657104492, "D": -14.082728385925293, "E": -14.882831573486328 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 0.06908607482910156, "scores": { "A": -13.294256210327148, "B": -10.70706558227539, "C": -10.776151657104492, "D": -14.082728385925293, "E": -14.882831573486328 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -1.0041084289550781, "scores": { "A": -5.43384313583374, "B": -6.437951564788818, "C": -6.356510639190674, "D": -9.189103126525879, "E": -8.109259605407715 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 0.8438491821289062, "scores": { "A": -8.9219388961792, "B": -6.8384809494018555, "C": -7.682330131530762, "D": -9.85693073272705, "E": -9.25981616973877 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -0.8855304718017578, "scores": { "A": -6.823153495788574, "B": -7.708683967590332, "C": -9.504840850830078, "D": -8.854315757751465, "E": -7.114500999450684 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -1.2796845436096191, "scores": { "A": -7.156657695770264, "B": -8.436342239379883, "C": -9.495588302612305, "D": -10.117122650146484, "E": -8.917900085449219 } } }, { "ex_id": "aqua-test-249", "gold": "C", "baseline": { "pred_label": "C", "correct": true, "margin": 0.821441650390625, "scores": { "A": -10.321834564208984, "B": -8.848502159118652, "C": -8.027060508728027, "D": -11.628623962402344, "E": -11.091792106628418 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.602280616760254, "scores": { "A": -6.007650375366211, "B": -7.634098052978516, "C": -8.609930992126465, "D": -8.443798065185547, "E": -8.685563087463379 } }, "patched_0": { "pred_label": "B", "correct": false, "margin": -0.3504180908203125, "scores": { "A": -8.986002922058105, "B": -6.979213714599609, "C": -7.329631805419922, "D": -10.10708236694336, "E": -9.747851371765137 } }, "patched_01": { "pred_label": "C", "correct": true, "margin": 0.8214454650878906, "scores": { "A": -10.321839332580566, "B": -8.848505973815918, "C": -8.027060508728027, "D": -11.628629684448242, "E": -11.091798782348633 } }, "patched_full": { "pred_label": "C", "correct": true, "margin": 0.8214454650878906, "scores": { "A": -10.321839332580566, "B": -8.848505973815918, "C": -8.027060508728027, "D": -11.628629684448242, "E": -11.091798782348633 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -2.610713005065918, "scores": { "A": -5.542219161987305, "B": -7.004818916320801, "C": -8.152932167053223, "D": -8.9476957321167, "E": -9.82064151763916 } }, "control_time_shuffled": { "pred_label": "B", "correct": false, "margin": -0.41672515869140625, "scores": { "A": -8.892544746398926, "B": -6.766074180603027, "C": -7.182799339294434, "D": -9.836996078491211, "E": -9.51023006439209 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -2.0196499824523926, "scores": { "A": -3.997561454772949, "B": -5.764638423919678, "C": -6.017211437225342, "D": -5.872786045074463, "E": -6.263749599456787 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -2.602281093597412, "scores": { "A": -6.007654666900635, "B": -7.634103298187256, "C": -8.609935760498047, "D": -8.443801879882812, "E": -8.685571670532227 } } }, { "ex_id": "aqua-test-251", "gold": "B", "baseline": { "pred_label": "B", "correct": true, "margin": 0.07524585723876953, "scores": { "A": -9.908409118652344, "B": -9.833163261413574, "C": -12.424334526062012, "D": -11.275071144104004, "E": -10.72103214263916 } }, "ablated": { "pred_label": "A", "correct": false, "margin": -2.978281021118164, "scores": { "A": -9.318894386291504, "B": -12.297175407409668, "C": -13.513100624084473, "D": -12.114720344543457, "E": -11.161179542541504 } }, "patched_0": { "pred_label": "B", "correct": true, "margin": 0.53338623046875, "scores": { "A": -9.37672233581543, "B": -8.81983757019043, "C": -10.16126823425293, "D": -10.652963638305664, "E": -9.35322380065918 } }, "patched_01": { "pred_label": "B", "correct": true, "margin": 0.07524681091308594, "scores": { "A": -9.90841007232666, "B": -9.833163261413574, "C": -12.424333572387695, "D": -11.27507209777832, "E": -10.72103500366211 } }, "patched_full": { "pred_label": "B", "correct": true, "margin": 0.07524681091308594, "scores": { "A": -9.90841007232666, "B": -9.833163261413574, "C": -12.424333572387695, "D": -11.27507209777832, "E": -10.72103500366211 } }, "debug_max_abs_diff_patched01_vs_full": 0.0, "control_rand_subspace": { "pred_label": "A", "correct": false, "margin": -2.792086601257324, "scores": { "A": -7.997702598571777, "B": -10.789789199829102, "C": -12.683808326721191, "D": -10.730910301208496, "E": -9.873717308044434 } }, "control_time_shuffled": { "pred_label": "B", "correct": true, "margin": 0.42225170135498047, "scores": { "A": -9.392989158630371, "B": -8.97073745727539, "C": -10.319923400878906, "D": -10.757732391357422, "E": -9.409377098083496 } }, "control_shared_randvec": { "pred_label": "A", "correct": false, "margin": -2.0303096771240234, "scores": { "A": -10.767759323120117, "B": -12.79806900024414, "C": -13.778217315673828, "D": -12.801124572753906, "E": -11.79677963256836 } }, "control_patch_nonshared": { "pred_label": "A", "correct": false, "margin": -2.978278160095215, "scores": { "A": -9.31889533996582, "B": -12.297173500061035, "C": -13.513103485107422, "D": -12.114721298217773, "E": -11.16118049621582 } } } ] }