| { |
| "meta": { |
| "model": "meta-llama/Llama-2-7b-chat-hf", |
| "device": "cuda", |
| "dtype": "fp32", |
| "layer": 10, |
| "task": "aqua", |
| "eval_meta": { |
| "subspace_split": null, |
| "eval_split": "test", |
| "available_splits": [ |
| "train", |
| "test", |
| "validation" |
| ], |
| "hf_id": "aqua_rat", |
| "options_prefix_stripped": true, |
| "force_answer_prefix": true |
| }, |
| "candidate_labels": [ |
| "A", |
| "B", |
| "C", |
| "D", |
| "E" |
| ], |
| "candidate_text_style": "space_letter", |
| "candidate_token_lens": { |
| "A": 2, |
| "B": 2, |
| "C": 2, |
| "D": 2, |
| "E": 2 |
| }, |
| "max_candidate_token_len": 2, |
| "patch_windows": { |
| "steps_0": [ |
| 0 |
| ], |
| "steps_01": [ |
| 0, |
| 1 |
| ], |
| "full_steps": [ |
| 0, |
| 1 |
| ], |
| "note": "If steps_01 == full_steps then patched_01 == patched_full by design." |
| }, |
| "add_special_tokens_prompt": true, |
| "seed": 123, |
| "Qs_path": "Q_shared_layer10.npy", |
| "Qs_shape": [ |
| 4096, |
| 97 |
| ], |
| "n_scanned": 254, |
| "baseline_acc": 0.20866141732283464, |
| "baseline_correct_n": 53, |
| "ablated_acc": 0.2204724409448819, |
| "ablated_correct_n": 56, |
| "n_flips_total": 42, |
| "n_flips_used": 42, |
| "layers_path": "model.layers" |
| }, |
| "summary_on_flips": { |
| "patched_0": { |
| "n": 42, |
| "rescued": 31, |
| "rescued_pct": 73.80952380952381, |
| "mean_dmargin": 3.3110272657303583, |
| "median_dmargin": 3.0882368087768555 |
| }, |
| "patched_01": { |
| "n": 42, |
| "rescued": 42, |
| "rescued_pct": 100.0, |
| "mean_dmargin": 3.6945105280194963, |
| "median_dmargin": 3.317805767059326 |
| }, |
| "patched_full": { |
| "n": 42, |
| "rescued": 42, |
| "rescued_pct": 100.0, |
| "mean_dmargin": 3.6945105280194963, |
| "median_dmargin": 3.317805767059326 |
| }, |
| "control_rand_subspace": { |
| "n": 42, |
| "rescued": 2, |
| "rescued_pct": 4.761904761904762, |
| "mean_dmargin": 0.2848027887798491, |
| "median_dmargin": 0.27750468254089355 |
| }, |
| "control_shared_randvec": { |
| "n": 42, |
| "rescued": 2, |
| "rescued_pct": 4.761904761904762, |
| "mean_dmargin": 0.38392406418209984, |
| "median_dmargin": 0.4009871482849121 |
| }, |
| "control_time_shuffled": { |
| "n": 42, |
| "rescued": 32, |
| "rescued_pct": 76.19047619047619, |
| "mean_dmargin": 3.2988027050381614, |
| "median_dmargin": 3.1241049766540527 |
| }, |
| "control_patch_nonshared": { |
| "n": 42, |
| "rescued": 0, |
| "rescued_pct": 0.0, |
| "mean_dmargin": 9.08261253720238e-08, |
| "median_dmargin": 0.0 |
| } |
| }, |
| "scan_rows": [ |
| { |
| "ex_id": "aqua-test-0", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.5365619659423828, |
| "scores": { |
| "A": -9.953326225280762, |
| "B": -9.932822227478027, |
| "C": -9.396260261535645, |
| "D": -11.750316619873047, |
| "E": -11.375755310058594 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.246011734008789, |
| "scores": { |
| "A": -9.17184829711914, |
| "B": -10.41786003112793, |
| "C": -11.056268692016602, |
| "D": -11.206304550170898, |
| "E": -11.109382629394531 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-1", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.03298187255859375, |
| "scores": { |
| "A": -12.888943672180176, |
| "B": -11.506059646606445, |
| "C": -11.539041519165039, |
| "D": -13.514416694641113, |
| "E": -13.277742385864258 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.448361873626709, |
| "scores": { |
| "A": -6.55993127822876, |
| "B": -11.09766960144043, |
| "C": -12.008293151855469, |
| "D": -11.017435073852539, |
| "E": -12.980535507202148 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-2", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2555389404296875, |
| "scores": { |
| "A": -11.233211517333984, |
| "B": -10.210750579833984, |
| "C": -13.17569351196289, |
| "D": -12.437894821166992, |
| "E": -10.466289520263672 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.949008941650391, |
| "scores": { |
| "A": -6.06699275970459, |
| "B": -14.01600170135498, |
| "C": -17.137845993041992, |
| "D": -15.27363109588623, |
| "E": -15.64785099029541 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-3", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.1184234619140625, |
| "scores": { |
| "A": -12.673524856567383, |
| "B": -8.500896453857422, |
| "C": -9.619319915771484, |
| "D": -12.860542297363281, |
| "E": -15.163476943969727 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.0571308135986328, |
| "scores": { |
| "A": -9.761144638061523, |
| "B": -9.180427551269531, |
| "C": -10.237558364868164, |
| "D": -11.047746658325195, |
| "E": -10.9005126953125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-4", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.8914222717285156, |
| "scores": { |
| "A": -10.94589900970459, |
| "B": -11.837321281433105, |
| "C": -13.137775421142578, |
| "D": -12.691411972045898, |
| "E": -12.272680282592773 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.8114051818847656, |
| "scores": { |
| "A": -8.195756912231445, |
| "B": -10.760305404663086, |
| "C": -11.914056777954102, |
| "D": -10.007162094116211, |
| "E": -10.895750045776367 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-5", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953394889831543, |
| "scores": { |
| "A": -11.989723205566406, |
| "B": -10.97428035736084, |
| "C": -12.035185813903809, |
| "D": -11.961091041564941, |
| "E": -11.927675247192383 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.1877222061157227, |
| "scores": { |
| "A": -7.596570014953613, |
| "B": -9.784292221069336, |
| "C": -11.036355018615723, |
| "D": -9.200647354125977, |
| "E": -10.078826904296875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-6", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.3978919982910156, |
| "scores": { |
| "A": -10.834028244018555, |
| "B": -12.190977096557617, |
| "C": -13.23192024230957, |
| "D": -14.303913116455078, |
| "E": -12.20677375793457 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.510491371154785, |
| "scores": { |
| "A": -7.008818626403809, |
| "B": -9.866249084472656, |
| "C": -12.519309997558594, |
| "D": -11.021146774291992, |
| "E": -11.009967803955078 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-7", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.163623809814453, |
| "scores": { |
| "A": -10.552425384521484, |
| "B": -9.352998733520508, |
| "C": -8.957988739013672, |
| "D": -11.121612548828125, |
| "E": -10.957361221313477 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.8754777908325195, |
| "scores": { |
| "A": -6.72020149230957, |
| "B": -10.692610740661621, |
| "C": -10.54880428314209, |
| "D": -10.59567928314209, |
| "E": -12.170318603515625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-8", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.255800247192383, |
| "scores": { |
| "A": -13.03713607788086, |
| "B": -11.900215148925781, |
| "C": -14.156015396118164, |
| "D": -12.385900497436523, |
| "E": -14.474089622497559 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -8.033926010131836, |
| "scores": { |
| "A": -6.845177173614502, |
| "B": -11.928691864013672, |
| "C": -14.87910270690918, |
| "D": -11.124820709228516, |
| "E": -12.958259582519531 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-9", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.0833330154418945, |
| "scores": { |
| "A": -11.261035919189453, |
| "B": -8.873366355895996, |
| "C": -9.95669937133789, |
| "D": -12.33233642578125, |
| "E": -13.964797973632812 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.0129852294921875, |
| "scores": { |
| "A": -7.305376052856445, |
| "B": -11.318361282348633, |
| "C": -11.48718547821045, |
| "D": -13.66738224029541, |
| "E": -15.269938468933105 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-10", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.20740604400634766, |
| "scores": { |
| "A": -11.915353775024414, |
| "B": -10.265600204467773, |
| "C": -13.313862800598145, |
| "D": -11.45443344116211, |
| "E": -10.473006248474121 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.0987701416015625, |
| "scores": { |
| "A": -7.162988662719727, |
| "B": -11.00853157043457, |
| "C": -14.745489120483398, |
| "D": -12.574932098388672, |
| "E": -14.261758804321289 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-11", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.032149314880371, |
| "scores": { |
| "A": -11.705740928649902, |
| "B": -10.383201599121094, |
| "C": -12.561548233032227, |
| "D": -12.598165512084961, |
| "E": -12.415350914001465 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.6796650886535645, |
| "scores": { |
| "A": -6.977108478546143, |
| "B": -10.32332992553711, |
| "C": -13.413308143615723, |
| "D": -11.12005615234375, |
| "E": -13.656773567199707 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-12", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.4920692443847656, |
| "scores": { |
| "A": -12.802289962768555, |
| "B": -9.024707794189453, |
| "C": -11.516777038574219, |
| "D": -11.572513580322266, |
| "E": -13.012077331542969 |
| } |
| }, |
| "ablated": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -3.181248664855957, |
| "scores": { |
| "A": -8.80724048614502, |
| "B": -9.62839126586914, |
| "C": -11.967851638793945, |
| "D": -8.786602973937988, |
| "E": -11.410276412963867 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-13", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.793048858642578, |
| "scores": { |
| "A": -12.858366012573242, |
| "B": -9.46006965637207, |
| "C": -9.54768180847168, |
| "D": -12.253118515014648, |
| "E": -12.870738983154297 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.821455955505371, |
| "scores": { |
| "A": -6.2456769943237305, |
| "B": -9.579475402832031, |
| "C": -8.059391021728516, |
| "D": -10.067132949829102, |
| "E": -13.029922485351562 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-14", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.6211061477661133, |
| "scores": { |
| "A": -12.599227905273438, |
| "B": -9.088578224182129, |
| "C": -10.55274772644043, |
| "D": -10.709684371948242, |
| "E": -9.514959335327148 |
| } |
| }, |
| "ablated": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.46077728271484375, |
| "scores": { |
| "A": -7.140524864196777, |
| "B": -8.438863754272461, |
| "C": -9.106466293334961, |
| "D": -6.679747581481934, |
| "E": -8.679572105407715 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-15", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6604747772216797, |
| "scores": { |
| "A": -11.07632064819336, |
| "B": -10.41584587097168, |
| "C": -13.610551834106445, |
| "D": -15.297096252441406, |
| "E": -13.782489776611328 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.743229389190674, |
| "scores": { |
| "A": -6.1119704246521, |
| "B": -10.855199813842773, |
| "C": -11.251523971557617, |
| "D": -11.053302764892578, |
| "E": -13.566537857055664 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-16", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.796067237854004, |
| "scores": { |
| "A": -12.479905128479004, |
| "B": -10.507231712341309, |
| "C": -7.711164474487305, |
| "D": -12.827747344970703, |
| "E": -12.807977676391602 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.861086368560791, |
| "scores": { |
| "A": -7.834758281707764, |
| "B": -9.467061996459961, |
| "C": -8.695844650268555, |
| "D": -9.597942352294922, |
| "E": -11.696287155151367 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-17", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.0560503005981445, |
| "scores": { |
| "A": -11.58172607421875, |
| "B": -10.525675773620605, |
| "C": -12.54155158996582, |
| "D": -12.84415054321289, |
| "E": -12.90414810180664 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 4.481626510620117, |
| "scores": { |
| "A": -7.197931289672852, |
| "B": -12.279987335205078, |
| "C": -14.135135650634766, |
| "D": -11.679557800292969, |
| "E": -15.510787963867188 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-18", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.3985824584960938, |
| "scores": { |
| "A": -11.516077041625977, |
| "B": -10.966100692749023, |
| "C": -13.956039428710938, |
| "D": -12.364683151245117, |
| "E": -12.156122207641602 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.602712631225586, |
| "scores": { |
| "A": -7.199901580810547, |
| "B": -10.85714340209961, |
| "C": -14.79636001586914, |
| "D": -11.802614212036133, |
| "E": -15.477705001831055 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-19", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.7771091461181641, |
| "scores": { |
| "A": -9.50829792022705, |
| "B": -9.048941612243652, |
| "C": -8.731188774108887, |
| "D": -10.535305976867676, |
| "E": -11.122632026672363 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.3860559463500977, |
| "scores": { |
| "A": -5.540165901184082, |
| "B": -9.812980651855469, |
| "C": -8.92622184753418, |
| "D": -10.782073974609375, |
| "E": -11.889626502990723 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-20", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.8421554565429688, |
| "scores": { |
| "A": -10.2200345993042, |
| "B": -9.71203899383545, |
| "C": -9.828819274902344, |
| "D": -11.333096504211426, |
| "E": -11.554194450378418 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.804967880249023, |
| "scores": { |
| "A": -6.537824630737305, |
| "B": -9.35693645477295, |
| "C": -12.991019248962402, |
| "D": -10.14554500579834, |
| "E": -14.342792510986328 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-21", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7691888809204102, |
| "scores": { |
| "A": -10.39490795135498, |
| "B": -9.62571907043457, |
| "C": -12.538268089294434, |
| "D": -12.220020294189453, |
| "E": -11.351235389709473 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.5374608039855957, |
| "scores": { |
| "A": -6.129680156707764, |
| "B": -9.66714096069336, |
| "C": -10.800978660583496, |
| "D": -10.67288875579834, |
| "E": -11.187762260437012 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-22", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.974935531616211, |
| "scores": { |
| "A": -9.757966995239258, |
| "B": -8.531810760498047, |
| "C": -11.770942687988281, |
| "D": -12.615520477294922, |
| "E": -11.506746292114258 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.905245304107666, |
| "scores": { |
| "A": -6.365554332733154, |
| "B": -10.006322860717773, |
| "C": -12.58491325378418, |
| "D": -13.247224807739258, |
| "E": -14.27079963684082 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-23", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.1230697631835938, |
| "scores": { |
| "A": -9.921218872070312, |
| "B": -8.795772552490234, |
| "C": -10.068075180053711, |
| "D": -11.918842315673828, |
| "E": -10.69045639038086 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.665317535400391, |
| "scores": { |
| "A": -6.0819854736328125, |
| "B": -10.304386138916016, |
| "C": -12.034563064575195, |
| "D": -12.747303009033203, |
| "E": -13.464553833007812 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-24", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.5330438613891602, |
| "scores": { |
| "A": -11.630638122558594, |
| "B": -10.715802192687988, |
| "C": -13.269601821899414, |
| "D": -11.420013427734375, |
| "E": -11.248846054077148 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.1090230941772461, |
| "scores": { |
| "A": -7.273715019226074, |
| "B": -7.602567672729492, |
| "C": -9.413043975830078, |
| "D": -7.838529586791992, |
| "E": -7.38273811340332 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-25", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.06520843505859375, |
| "scores": { |
| "A": -12.949111938476562, |
| "B": -12.246522903442383, |
| "C": -12.181314468383789, |
| "D": -12.397541046142578, |
| "E": -13.614669799804688 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.5791339874267578, |
| "scores": { |
| "A": -8.643856048583984, |
| "B": -10.894746780395508, |
| "C": -10.222990036010742, |
| "D": -9.472063064575195, |
| "E": -10.86764144897461 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-26", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -3.831747055053711, |
| "scores": { |
| "A": -12.59067440032959, |
| "B": -10.750125885009766, |
| "C": -8.758927345275879, |
| "D": -12.953798294067383, |
| "E": -10.869454383850098 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.6270813941955566, |
| "scores": { |
| "A": -7.691537380218506, |
| "B": -10.792871475219727, |
| "C": -8.318618774414062, |
| "D": -9.645444869995117, |
| "E": -11.4267578125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-27", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.8056774139404297, |
| "scores": { |
| "A": -10.92805290222168, |
| "B": -9.627052307128906, |
| "C": -10.676057815551758, |
| "D": -12.432729721069336, |
| "E": -12.006237030029297 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.3288869857788086, |
| "scores": { |
| "A": -8.168229103088379, |
| "B": -10.158320426940918, |
| "C": -11.378348350524902, |
| "D": -10.497116088867188, |
| "E": -11.294864654541016 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-28", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.3396778106689453, |
| "scores": { |
| "A": -10.445246696472168, |
| "B": -11.784924507141113, |
| "C": -12.745423316955566, |
| "D": -13.060553550720215, |
| "E": -13.08572769165039 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 4.244240760803223, |
| "scores": { |
| "A": -9.513802528381348, |
| "B": -13.75804328918457, |
| "C": -14.599481582641602, |
| "D": -14.581493377685547, |
| "E": -13.772315979003906 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-29", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.6846466064453125, |
| "scores": { |
| "A": -12.935235977172852, |
| "B": -10.250589370727539, |
| "C": -10.775646209716797, |
| "D": -10.991680145263672, |
| "E": -12.39478874206543 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.3767662048339844, |
| "scores": { |
| "A": -8.437541961669922, |
| "B": -10.61314868927002, |
| "C": -11.896113395690918, |
| "D": -9.814308166503906, |
| "E": -11.01830005645752 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-30", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -4.338037490844727, |
| "scores": { |
| "A": -11.664570808410645, |
| "B": -11.412707328796387, |
| "C": -11.081539154052734, |
| "D": -15.419576644897461, |
| "E": -11.825617790222168 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.817980766296387, |
| "scores": { |
| "A": -6.548068046569824, |
| "B": -12.580531120300293, |
| "C": -14.352285385131836, |
| "D": -13.366048812866211, |
| "E": -12.250596046447754 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-31", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.5094156265258789, |
| "scores": { |
| "A": -10.960805892944336, |
| "B": -9.841753005981445, |
| "C": -10.351168632507324, |
| "D": -10.83080768585205, |
| "E": -11.36546802520752 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.294549942016602, |
| "scores": { |
| "A": -6.641495704650879, |
| "B": -10.112987518310547, |
| "C": -11.93604564666748, |
| "D": -10.474628448486328, |
| "E": -9.680524826049805 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-32", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.4654970169067383, |
| "scores": { |
| "A": -12.72982406616211, |
| "B": -10.999740600585938, |
| "C": -10.5342435836792, |
| "D": -12.491869926452637, |
| "E": -11.609811782836914 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.625300884246826, |
| "scores": { |
| "A": -6.746448993682861, |
| "B": -10.371749877929688, |
| "C": -10.17892074584961, |
| "D": -12.78900146484375, |
| "E": -13.765708923339844 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-33", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.2154502868652344, |
| "scores": { |
| "A": -17.279247283935547, |
| "B": -18.187232971191406, |
| "C": -16.063796997070312, |
| "D": -19.143869400024414, |
| "E": -19.470874786376953 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.4487724304199219, |
| "scores": { |
| "A": -9.145519256591797, |
| "B": -10.157659530639648, |
| "C": -9.594291687011719, |
| "D": -10.095281600952148, |
| "E": -10.523807525634766 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-34", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.934541702270508, |
| "scores": { |
| "A": -14.833627700805664, |
| "B": -11.353015899658203, |
| "C": -14.715003967285156, |
| "D": -14.287557601928711, |
| "E": -15.349594116210938 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.323911666870117, |
| "scores": { |
| "A": -9.540066719055176, |
| "B": -11.480504035949707, |
| "C": -14.825972557067871, |
| "D": -14.863978385925293, |
| "E": -18.370067596435547 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-35", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.0334539413452148, |
| "scores": { |
| "A": -11.940654754638672, |
| "B": -10.587756156921387, |
| "C": -11.621210098266602, |
| "D": -11.252909660339355, |
| "E": -11.599471092224121 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.0078792572021484, |
| "scores": { |
| "A": -8.075170516967773, |
| "B": -10.238532066345215, |
| "C": -10.083049774169922, |
| "D": -9.30896282196045, |
| "E": -9.410977363586426 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-36", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.7479333877563477, |
| "scores": { |
| "A": -9.8831205368042, |
| "B": -10.474870681762695, |
| "C": -10.734328269958496, |
| "D": -12.681618690490723, |
| "E": -11.631053924560547 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.574769020080566, |
| "scores": { |
| "A": -8.31888198852539, |
| "B": -12.751762390136719, |
| "C": -12.679941177368164, |
| "D": -14.572219848632812, |
| "E": -14.893651008605957 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-37", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.9962596893310547, |
| "scores": { |
| "A": -11.241470336914062, |
| "B": -10.501060485839844, |
| "C": -13.315231323242188, |
| "D": -12.760457992553711, |
| "E": -11.497320175170898 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.8308868408203125, |
| "scores": { |
| "A": -9.542606353759766, |
| "B": -10.720484733581543, |
| "C": -12.237710952758789, |
| "D": -10.93893814086914, |
| "E": -10.373493194580078 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-38", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.5876941680908203, |
| "scores": { |
| "A": -13.907208442687988, |
| "B": -12.088470458984375, |
| "C": -17.00652313232422, |
| "D": -16.789785385131836, |
| "E": -13.676164627075195 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.61981201171875, |
| "scores": { |
| "A": -8.97873306274414, |
| "B": -11.468414306640625, |
| "C": -14.408849716186523, |
| "D": -12.497419357299805, |
| "E": -12.59854507446289 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-39", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.704728126525879, |
| "scores": { |
| "A": -10.207995414733887, |
| "B": -11.912723541259766, |
| "C": -12.109935760498047, |
| "D": -14.276583671569824, |
| "E": -13.992156982421875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.4076976776123047, |
| "scores": { |
| "A": -10.194977760314941, |
| "B": -10.153923988342285, |
| "C": -9.787280082702637, |
| "D": -11.554168701171875, |
| "E": -10.806174278259277 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-40", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.1876583099365234, |
| "scores": { |
| "A": -9.934663772583008, |
| "B": -8.747005462646484, |
| "C": -8.951501846313477, |
| "D": -11.43099308013916, |
| "E": -10.340058326721191 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 2.8720083236694336, |
| "scores": { |
| "A": -6.977086067199707, |
| "B": -10.881498336791992, |
| "C": -10.488456726074219, |
| "D": -9.84909439086914, |
| "E": -9.965597152709961 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-41", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.4858741760253906, |
| "scores": { |
| "A": -13.249530792236328, |
| "B": -12.764188766479492, |
| "C": -14.671175003051758, |
| "D": -16.250062942504883, |
| "E": -16.50116539001465 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.068471908569336, |
| "scores": { |
| "A": -8.645910263061523, |
| "B": -9.370107650756836, |
| "C": -10.64356803894043, |
| "D": -10.71438217163086, |
| "E": -10.385175704956055 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-42", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.9857778549194336, |
| "scores": { |
| "A": -14.824063301086426, |
| "B": -12.47767448425293, |
| "C": -12.883535385131836, |
| "D": -13.463452339172363, |
| "E": -14.847708702087402 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.4033865928649902, |
| "scores": { |
| "A": -6.698444843292236, |
| "B": -11.148558616638184, |
| "C": -12.872434616088867, |
| "D": -10.101831436157227, |
| "E": -12.350337982177734 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-43", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.739567756652832, |
| "scores": { |
| "A": -12.141595840454102, |
| "B": -11.716957092285156, |
| "C": -11.391490936279297, |
| "D": -12.131058692932129, |
| "E": -13.988408088684082 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.8107433319091797, |
| "scores": { |
| "A": -5.002326965332031, |
| "B": -7.882379531860352, |
| "C": -9.065218925476074, |
| "D": -6.813070297241211, |
| "E": -9.598858833312988 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-44", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.3987007141113281, |
| "scores": { |
| "A": -10.112314224243164, |
| "B": -10.511014938354492, |
| "C": -10.519290924072266, |
| "D": -12.189737319946289, |
| "E": -15.004023551940918 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.4644508361816406, |
| "scores": { |
| "A": -7.429119110107422, |
| "B": -11.428091049194336, |
| "C": -14.222383499145508, |
| "D": -10.893569946289062, |
| "E": -14.403785705566406 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-45", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -1.0466318130493164, |
| "scores": { |
| "A": -12.969017028808594, |
| "B": -12.31945514678955, |
| "C": -11.922385215759277, |
| "D": -12.34321403503418, |
| "E": -12.47985553741455 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 2.317699909210205, |
| "scores": { |
| "A": -7.05304479598999, |
| "B": -9.370744705200195, |
| "C": -10.5771484375, |
| "D": -9.985260009765625, |
| "E": -12.94332504272461 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-46", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.7821559906005859, |
| "scores": { |
| "A": -12.967859268188477, |
| "B": -11.404945373535156, |
| "C": -11.63718032836914, |
| "D": -11.908271789550781, |
| "E": -12.187101364135742 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.7385272979736328, |
| "scores": { |
| "A": -7.029201507568359, |
| "B": -8.36732292175293, |
| "C": -10.496192932128906, |
| "D": -8.434120178222656, |
| "E": -8.767728805541992 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-47", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.20550537109375, |
| "scores": { |
| "A": -11.954267501831055, |
| "B": -12.503751754760742, |
| "C": -12.114371299743652, |
| "D": -13.045472145080566, |
| "E": -11.748762130737305 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.0157623291015625, |
| "scores": { |
| "A": -9.386420249938965, |
| "B": -11.835212707519531, |
| "C": -13.338075637817383, |
| "D": -12.148918151855469, |
| "E": -13.402182579040527 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-48", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.2838096618652344, |
| "scores": { |
| "A": -14.12697982788086, |
| "B": -14.659561157226562, |
| "C": -9.08004379272461, |
| "D": -10.326372146606445, |
| "E": -11.363853454589844 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -1.4984474182128906, |
| "scores": { |
| "A": -7.6805315017700195, |
| "B": -8.929984092712402, |
| "C": -6.419025421142578, |
| "D": -7.822979927062988, |
| "E": -7.917472839355469 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-49", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -4.129861831665039, |
| "scores": { |
| "A": -15.956474304199219, |
| "B": -11.82661247253418, |
| "C": -14.917438507080078, |
| "D": -13.226446151733398, |
| "E": -13.977205276489258 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.0888185501098633, |
| "scores": { |
| "A": -8.945561408996582, |
| "B": -10.034379959106445, |
| "C": -13.530162811279297, |
| "D": -10.250844955444336, |
| "E": -12.504024505615234 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-50", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.279160499572754, |
| "scores": { |
| "A": -9.496601104736328, |
| "B": -8.565024375915527, |
| "C": -9.976577758789062, |
| "D": -10.844184875488281, |
| "E": -10.707534790039062 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.749849319458008, |
| "scores": { |
| "A": -7.051667213439941, |
| "B": -10.064657211303711, |
| "C": -12.560236930847168, |
| "D": -10.80151653289795, |
| "E": -13.33292007446289 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-51", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.252643585205078, |
| "scores": { |
| "A": -11.461423873901367, |
| "B": -9.536659240722656, |
| "C": -9.648260116577148, |
| "D": -12.789302825927734, |
| "E": -11.993169784545898 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.026073455810547, |
| "scores": { |
| "A": -5.237083435058594, |
| "B": -8.581623077392578, |
| "C": -12.21021842956543, |
| "D": -10.26315689086914, |
| "E": -11.514408111572266 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-52", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.04578971862792969, |
| "scores": { |
| "A": -12.882274627685547, |
| "B": -9.855215072631836, |
| "C": -9.901004791259766, |
| "D": -11.499755859375, |
| "E": -10.678110122680664 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.4922494888305664, |
| "scores": { |
| "A": -5.136632442474365, |
| "B": -5.628881931304932, |
| "C": -6.605200290679932, |
| "D": -6.88695764541626, |
| "E": -6.429419994354248 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-53", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -4.019079208374023, |
| "scores": { |
| "A": -12.729389190673828, |
| "B": -12.427694320678711, |
| "C": -8.400447845458984, |
| "D": -12.419527053833008, |
| "E": -13.457754135131836 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.768967628479004, |
| "scores": { |
| "A": -4.923480033874512, |
| "B": -8.778578758239746, |
| "C": -8.191584587097168, |
| "D": -9.692447662353516, |
| "E": -10.092605590820312 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-54", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.2229089736938477, |
| "scores": { |
| "A": -12.411125183105469, |
| "B": -12.760860443115234, |
| "C": -11.302736282348633, |
| "D": -13.52564525604248, |
| "E": -11.65049934387207 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.5538482666015625, |
| "scores": { |
| "A": -7.524580955505371, |
| "B": -12.95750904083252, |
| "C": -11.718106269836426, |
| "D": -13.078429222106934, |
| "E": -12.335714340209961 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-55", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.8632240295410156, |
| "scores": { |
| "A": -10.496152877807617, |
| "B": -10.564685821533203, |
| "C": -9.701461791992188, |
| "D": -13.170589447021484, |
| "E": -11.492547988891602 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.287240982055664, |
| "scores": { |
| "A": -8.019121170043945, |
| "B": -10.30636215209961, |
| "C": -11.232714653015137, |
| "D": -11.299230575561523, |
| "E": -13.430822372436523 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-56", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -1.5651264190673828, |
| "scores": { |
| "A": -10.601175308227539, |
| "B": -11.313573837280273, |
| "C": -12.363874435424805, |
| "D": -12.14034652709961, |
| "E": -10.575220108032227 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.2690534591674805, |
| "scores": { |
| "A": -7.066210746765137, |
| "B": -8.967557907104492, |
| "C": -10.526098251342773, |
| "D": -8.335264205932617, |
| "E": -9.71631145477295 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-57", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.14557647705078125, |
| "scores": { |
| "A": -13.898555755615234, |
| "B": -12.992910385131836, |
| "C": -14.371723175048828, |
| "D": -14.158893585205078, |
| "E": -13.138486862182617 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.6548147201538086, |
| "scores": { |
| "A": -10.324930191040039, |
| "B": -12.979744911193848, |
| "C": -12.848653793334961, |
| "D": -12.86312484741211, |
| "E": -12.547582626342773 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-58", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.41909122467041, |
| "scores": { |
| "A": -15.519378662109375, |
| "B": -10.206266403198242, |
| "C": -13.625357627868652, |
| "D": -15.576879501342773, |
| "E": -14.738330841064453 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -6.554704189300537, |
| "scores": { |
| "A": -10.922317504882812, |
| "B": -7.338093280792236, |
| "C": -13.892797470092773, |
| "D": -11.01749038696289, |
| "E": -13.093095779418945 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-59", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.0286178588867188, |
| "scores": { |
| "A": -9.556885719299316, |
| "B": -8.528267860412598, |
| "C": -9.387777328491211, |
| "D": -11.924543380737305, |
| "E": -12.156147003173828 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 2.6055479049682617, |
| "scores": { |
| "A": -7.506214141845703, |
| "B": -10.111762046813965, |
| "C": -10.535852432250977, |
| "D": -12.113842010498047, |
| "E": -13.108339309692383 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-60", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -4.278145790100098, |
| "scores": { |
| "A": -9.984968185424805, |
| "B": -9.964075088500977, |
| "C": -10.470348358154297, |
| "D": -14.242220878601074, |
| "E": -14.801360130310059 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.694246292114258, |
| "scores": { |
| "A": -5.662052154541016, |
| "B": -9.853955268859863, |
| "C": -11.739667892456055, |
| "D": -12.356298446655273, |
| "E": -13.508790016174316 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-61", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.4795856475830078, |
| "scores": { |
| "A": -11.702659606933594, |
| "B": -13.152563095092773, |
| "C": -15.516944885253906, |
| "D": -12.736021041870117, |
| "E": -13.182245254516602 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.2732534408569336, |
| "scores": { |
| "A": -7.3774213790893555, |
| "B": -8.904997825622559, |
| "C": -12.49362564086914, |
| "D": -8.682700157165527, |
| "E": -9.650674819946289 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-62", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -4.083756446838379, |
| "scores": { |
| "A": -12.819320678710938, |
| "B": -9.86478042602539, |
| "C": -8.735564231872559, |
| "D": -13.259029388427734, |
| "E": -14.102011680603027 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 2.9089107513427734, |
| "scores": { |
| "A": -5.8731231689453125, |
| "B": -8.782033920288086, |
| "C": -8.930816650390625, |
| "D": -11.697149276733398, |
| "E": -13.882165908813477 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-63", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.828573226928711, |
| "scores": { |
| "A": -12.999687194824219, |
| "B": -11.171113967895508, |
| "C": -12.38466739654541, |
| "D": -15.068181991577148, |
| "E": -14.821438789367676 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 6.530241966247559, |
| "scores": { |
| "A": -9.71985149383545, |
| "B": -16.808002471923828, |
| "C": -17.539220809936523, |
| "D": -16.250093460083008, |
| "E": -17.91951560974121 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-64", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.11572933197021484, |
| "scores": { |
| "A": -11.253127098083496, |
| "B": -9.855234146118164, |
| "C": -9.970963478088379, |
| "D": -11.471985816955566, |
| "E": -13.291877746582031 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.368386745452881, |
| "scores": { |
| "A": -7.864134311676025, |
| "B": -8.514406204223633, |
| "C": -10.232521057128906, |
| "D": -9.923612594604492, |
| "E": -10.108715057373047 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-65", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.8041362762451172, |
| "scores": { |
| "A": -11.474063873291016, |
| "B": -10.669927597045898, |
| "C": -11.573220252990723, |
| "D": -11.306943893432617, |
| "E": -12.886905670166016 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 2.3487911224365234, |
| "scores": { |
| "A": -8.487371444702148, |
| "B": -10.836162567138672, |
| "C": -13.451092720031738, |
| "D": -12.694389343261719, |
| "E": -12.682896614074707 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-66", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.141798973083496, |
| "scores": { |
| "A": -12.508940696716309, |
| "B": -10.861954689025879, |
| "C": -12.234925270080566, |
| "D": -11.884855270385742, |
| "E": -13.003753662109375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.2710695266723633, |
| "scores": { |
| "A": -8.62102222442627, |
| "B": -10.894733428955078, |
| "C": -14.39864730834961, |
| "D": -10.672046661376953, |
| "E": -11.892091751098633 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-67", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.341836929321289, |
| "scores": { |
| "A": -8.820962905883789, |
| "B": -8.544965744018555, |
| "C": -9.699121475219727, |
| "D": -9.886802673339844, |
| "E": -10.276521682739258 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.691999435424805, |
| "scores": { |
| "A": -6.632804870605469, |
| "B": -11.282808303833008, |
| "C": -13.191905975341797, |
| "D": -11.324804306030273, |
| "E": -13.604455947875977 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-68", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6945219039916992, |
| "scores": { |
| "A": -11.65401840209961, |
| "B": -10.95949649810791, |
| "C": -11.869510650634766, |
| "D": -12.070514678955078, |
| "E": -12.618841171264648 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.171473503112793, |
| "scores": { |
| "A": -9.029966354370117, |
| "B": -11.20143985748291, |
| "C": -11.244144439697266, |
| "D": -11.500038146972656, |
| "E": -10.598958015441895 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-69", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.48809242248535156, |
| "scores": { |
| "A": -12.742959976196289, |
| "B": -10.583757400512695, |
| "C": -11.071849822998047, |
| "D": -14.073648452758789, |
| "E": -13.576339721679688 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.911890983581543, |
| "scores": { |
| "A": -6.22026252746582, |
| "B": -9.122340202331543, |
| "C": -13.132153511047363, |
| "D": -11.907660484313965, |
| "E": -11.635204315185547 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-70", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.720950126647949, |
| "scores": { |
| "A": -12.35704231262207, |
| "B": -8.636092185974121, |
| "C": -13.832864761352539, |
| "D": -10.753250122070312, |
| "E": -13.392253875732422 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.03596019744873047, |
| "scores": { |
| "A": -7.090466499328613, |
| "B": -7.054506301879883, |
| "C": -11.16434097290039, |
| "D": -7.422432899475098, |
| "E": -10.723372459411621 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-71", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -3.9103527069091797, |
| "scores": { |
| "A": -14.14399528503418, |
| "B": -15.403404235839844, |
| "C": -10.233642578125, |
| "D": -13.802553176879883, |
| "E": -11.533794403076172 |
| } |
| }, |
| "ablated": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -0.41689586639404297, |
| "scores": { |
| "A": -8.586220741271973, |
| "B": -12.792287826538086, |
| "C": -9.270968437194824, |
| "D": -9.66348648071289, |
| "E": -8.16932487487793 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-72", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.3486766815185547, |
| "scores": { |
| "A": -14.158398628234863, |
| "B": -12.339851379394531, |
| "C": -13.015729904174805, |
| "D": -13.451315879821777, |
| "E": -13.688528060913086 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.4561805725097656, |
| "scores": { |
| "A": -8.697513580322266, |
| "B": -9.179859161376953, |
| "C": -10.927139282226562, |
| "D": -9.569753646850586, |
| "E": -10.153694152832031 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-73", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -6.728538513183594, |
| "scores": { |
| "A": -15.75730037689209, |
| "B": -13.720550537109375, |
| "C": -10.241283416748047, |
| "D": -14.861220359802246, |
| "E": -16.96982192993164 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -4.603492736816406, |
| "scores": { |
| "A": -7.970151901245117, |
| "B": -8.63032341003418, |
| "C": -5.910724639892578, |
| "D": -10.074857711791992, |
| "E": -10.514217376708984 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-74", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -1.4039888381958008, |
| "scores": { |
| "A": -12.737687110900879, |
| "B": -11.68657112121582, |
| "C": -11.333698272705078, |
| "D": -13.284832954406738, |
| "E": -14.690400123596191 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.2767963409423828, |
| "scores": { |
| "A": -7.744673728942871, |
| "B": -9.163191795349121, |
| "C": -8.021470069885254, |
| "D": -8.26796817779541, |
| "E": -8.809639930725098 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-75", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.457437515258789, |
| "scores": { |
| "A": -12.093403816223145, |
| "B": -8.635966300964355, |
| "C": -10.58320140838623, |
| "D": -12.374037742614746, |
| "E": -13.680496215820312 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 4.63665771484375, |
| "scores": { |
| "A": -6.7574872970581055, |
| "B": -11.394145011901855, |
| "C": -12.999401092529297, |
| "D": -11.796443939208984, |
| "E": -13.318641662597656 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-76", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.5669078826904297, |
| "scores": { |
| "A": -13.41685676574707, |
| "B": -11.04054069519043, |
| "C": -11.60744857788086, |
| "D": -15.16108512878418, |
| "E": -14.487443923950195 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.9996509552001953, |
| "scores": { |
| "A": -10.679261207580566, |
| "B": -12.45645523071289, |
| "C": -12.678912162780762, |
| "D": -12.86469554901123, |
| "E": -14.037067413330078 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-77", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.8945484161376953, |
| "scores": { |
| "A": -12.302556991577148, |
| "B": -10.522138595581055, |
| "C": -9.23642349243164, |
| "D": -12.130971908569336, |
| "E": -14.117457389831543 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.082538604736328, |
| "scores": { |
| "A": -6.24571418762207, |
| "B": -8.398621559143066, |
| "C": -11.599692344665527, |
| "D": -11.328252792358398, |
| "E": -12.078139305114746 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-78", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.0808143615722656, |
| "scores": { |
| "A": -12.794174194335938, |
| "B": -8.323003768920898, |
| "C": -11.403818130493164, |
| "D": -13.768218994140625, |
| "E": -13.847496032714844 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.647706031799316, |
| "scores": { |
| "A": -5.634004592895508, |
| "B": -10.281710624694824, |
| "C": -11.297346115112305, |
| "D": -12.075166702270508, |
| "E": -12.413890838623047 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-79", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.665989875793457, |
| "scores": { |
| "A": -11.933293342590332, |
| "B": -11.267303466796875, |
| "C": -12.420202255249023, |
| "D": -11.657835006713867, |
| "E": -12.877152442932129 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.082460880279541, |
| "scores": { |
| "A": -7.030532360076904, |
| "B": -8.325664520263672, |
| "C": -10.181509017944336, |
| "D": -8.112993240356445, |
| "E": -9.957942962646484 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-80", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.5075702667236328, |
| "scores": { |
| "A": -10.707103729248047, |
| "B": -9.199533462524414, |
| "C": -10.327856063842773, |
| "D": -10.43326187133789, |
| "E": -11.281829833984375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.147303581237793, |
| "scores": { |
| "A": -5.987746238708496, |
| "B": -7.135049819946289, |
| "C": -8.613941192626953, |
| "D": -7.509088516235352, |
| "E": -7.937631607055664 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-81", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.7157487869262695, |
| "scores": { |
| "A": -12.188438415527344, |
| "B": -11.99388599395752, |
| "C": -10.619071006774902, |
| "D": -14.215484619140625, |
| "E": -13.334819793701172 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.729328155517578, |
| "scores": { |
| "A": -10.309041976928711, |
| "B": -13.062793731689453, |
| "C": -12.420219421386719, |
| "D": -12.16856575012207, |
| "E": -13.038370132446289 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-82", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.778050422668457, |
| "scores": { |
| "A": -12.289058685302734, |
| "B": -9.830384254455566, |
| "C": -12.608434677124023, |
| "D": -13.991266250610352, |
| "E": -11.78373908996582 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.322303771972656, |
| "scores": { |
| "A": -6.604994773864746, |
| "B": -9.52199649810791, |
| "C": -10.927298545837402, |
| "D": -10.680765151977539, |
| "E": -9.880135536193848 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-83", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -1.9063081741333008, |
| "scores": { |
| "A": -10.316740036010742, |
| "B": -9.180550575256348, |
| "C": -8.257037162780762, |
| "D": -10.163345336914062, |
| "E": -9.424388885498047 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.0506300926208496, |
| "scores": { |
| "A": -5.58945894241333, |
| "B": -7.652187824249268, |
| "C": -9.419204711914062, |
| "D": -8.64008903503418, |
| "E": -10.067176818847656 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-84", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.371249198913574, |
| "scores": { |
| "A": -11.472586631774902, |
| "B": -10.989583015441895, |
| "C": -9.101337432861328, |
| "D": -9.797515869140625, |
| "E": -10.398811340332031 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.0994462966918945, |
| "scores": { |
| "A": -9.702713012695312, |
| "B": -11.570058822631836, |
| "C": -11.636595726013184, |
| "D": -10.802159309387207, |
| "E": -11.598857879638672 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-85", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.0057201385498047, |
| "scores": { |
| "A": -11.025768280029297, |
| "B": -9.936981201171875, |
| "C": -9.020048141479492, |
| "D": -13.46237564086914, |
| "E": -13.570629119873047 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.6366686820983887, |
| "scores": { |
| "A": -6.244076251983643, |
| "B": -10.921756744384766, |
| "C": -9.880744934082031, |
| "D": -11.773923873901367, |
| "E": -14.056009292602539 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-86", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.7587852478027344, |
| "scores": { |
| "A": -10.227313995361328, |
| "B": -8.468528747558594, |
| "C": -10.537178039550781, |
| "D": -12.026582717895508, |
| "E": -11.197158813476562 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.7784309387207031, |
| "scores": { |
| "A": -9.244726181030273, |
| "B": -11.023157119750977, |
| "C": -12.277462005615234, |
| "D": -12.193278312683105, |
| "E": -11.235006332397461 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-87", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6314544677734375, |
| "scores": { |
| "A": -9.793952941894531, |
| "B": -9.162498474121094, |
| "C": -11.231021881103516, |
| "D": -12.002910614013672, |
| "E": -11.467964172363281 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.9217519760131836, |
| "scores": { |
| "A": -6.585877418518066, |
| "B": -9.50762939453125, |
| "C": -9.712257385253906, |
| "D": -9.212251663208008, |
| "E": -11.261186599731445 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-88", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.4125795364379883, |
| "scores": { |
| "A": -14.69003963470459, |
| "B": -12.324016571044922, |
| "C": -13.061227798461914, |
| "D": -13.971894264221191, |
| "E": -13.73659610748291 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.924210071563721, |
| "scores": { |
| "A": -5.407630443572998, |
| "B": -6.251180171966553, |
| "C": -7.470930576324463, |
| "D": -9.033124923706055, |
| "E": -10.331840515136719 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-89", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.028618812561035, |
| "scores": { |
| "A": -10.8992919921875, |
| "B": -7.870673179626465, |
| "C": -10.651062965393066, |
| "D": -12.425169944763184, |
| "E": -11.295161247253418 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.749924659729004, |
| "scores": { |
| "A": -6.359846115112305, |
| "B": -8.109770774841309, |
| "C": -11.431536674499512, |
| "D": -9.726787567138672, |
| "E": -9.291827201843262 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-90", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.4867076873779297, |
| "scores": { |
| "A": -11.009735107421875, |
| "B": -10.518085479736328, |
| "C": -12.004793167114258, |
| "D": -12.105035781860352, |
| "E": -13.80916976928711 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.5990352630615234, |
| "scores": { |
| "A": -8.198083877563477, |
| "B": -9.129544258117676, |
| "C": -10.797119140625, |
| "D": -9.568111419677734, |
| "E": -10.094569206237793 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-91", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.3843660354614258, |
| "scores": { |
| "A": -12.006914138793945, |
| "B": -10.0424222946167, |
| "C": -11.426788330078125, |
| "D": -12.127811431884766, |
| "E": -10.665849685668945 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.6309232711791992, |
| "scores": { |
| "A": -8.031240463256836, |
| "B": -8.961100578308105, |
| "C": -9.662163734436035, |
| "D": -9.757013320922852, |
| "E": -9.014056205749512 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-92", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.3693962097167969, |
| "scores": { |
| "A": -12.054543495178223, |
| "B": -12.42393970489502, |
| "C": -12.770564079284668, |
| "D": -14.098543167114258, |
| "E": -14.959080696105957 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.9901371002197266, |
| "scores": { |
| "A": -11.04039192199707, |
| "B": -15.030529022216797, |
| "C": -15.173776626586914, |
| "D": -13.621156692504883, |
| "E": -17.786663055419922 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-93", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.8483829498291016, |
| "scores": { |
| "A": -13.06054401397705, |
| "B": -9.098247528076172, |
| "C": -9.98631477355957, |
| "D": -10.790071487426758, |
| "E": -10.946630477905273 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.748597145080566, |
| "scores": { |
| "A": -8.24567699432373, |
| "B": -12.128637313842773, |
| "C": -15.724483489990234, |
| "D": -13.707090377807617, |
| "E": -14.994274139404297 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-94", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -1.9227781295776367, |
| "scores": { |
| "A": -12.715982437133789, |
| "B": -10.660414695739746, |
| "C": -10.040125846862793, |
| "D": -13.691793441772461, |
| "E": -11.96290397644043 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.577579975128174, |
| "scores": { |
| "A": -5.7996954917907715, |
| "B": -6.2772345542907715, |
| "C": -9.271978378295898, |
| "D": -10.277997970581055, |
| "E": -11.377275466918945 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-95", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -3.9412012100219727, |
| "scores": { |
| "A": -11.20505428314209, |
| "B": -9.095925331115723, |
| "C": -8.017539978027344, |
| "D": -12.286678314208984, |
| "E": -11.958741188049316 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.4480342864990234, |
| "scores": { |
| "A": -8.222542762756348, |
| "B": -9.621024131774902, |
| "C": -8.630133628845215, |
| "D": -9.554940223693848, |
| "E": -9.670577049255371 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-96", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -4.339990615844727, |
| "scores": { |
| "A": -9.821067810058594, |
| "B": -9.126599311828613, |
| "C": -13.028761863708496, |
| "D": -11.429372787475586, |
| "E": -13.46658992767334 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.383525371551514, |
| "scores": { |
| "A": -7.286087512969971, |
| "B": -8.687753677368164, |
| "C": -13.005938529968262, |
| "D": -10.64708423614502, |
| "E": -11.669612884521484 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-97", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.8383378982543945, |
| "scores": { |
| "A": -14.469042778015137, |
| "B": -12.092732429504395, |
| "C": -13.835532188415527, |
| "D": -14.931070327758789, |
| "E": -13.803962707519531 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.630073547363281, |
| "scores": { |
| "A": -10.096877098083496, |
| "B": -10.369461059570312, |
| "C": -13.923606872558594, |
| "D": -14.726950645446777, |
| "E": -14.135393142700195 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-98", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.004549980163574, |
| "scores": { |
| "A": -13.00085735321045, |
| "B": -10.797918319702148, |
| "C": -10.5806884765625, |
| "D": -12.585238456726074, |
| "E": -11.885275840759277 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.416612148284912, |
| "scores": { |
| "A": -4.055731296539307, |
| "B": -8.211648941040039, |
| "C": -10.580713272094727, |
| "D": -10.472343444824219, |
| "E": -12.015127182006836 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-99", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.7006492614746094, |
| "scores": { |
| "A": -10.885625839233398, |
| "B": -9.20606803894043, |
| "C": -9.620462417602539, |
| "D": -11.3240966796875, |
| "E": -10.906717300415039 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.5970916748046875, |
| "scores": { |
| "A": -7.159132957458496, |
| "B": -8.3507661819458, |
| "C": -12.776918411254883, |
| "D": -12.628029823303223, |
| "E": -14.756224632263184 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-100", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.1513805389404297, |
| "scores": { |
| "A": -9.272323608398438, |
| "B": -9.739631652832031, |
| "C": -9.120943069458008, |
| "D": -10.063505172729492, |
| "E": -10.608749389648438 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.166820049285889, |
| "scores": { |
| "A": -4.778280735015869, |
| "B": -9.417329788208008, |
| "C": -10.945100784301758, |
| "D": -11.501747131347656, |
| "E": -13.226821899414062 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-101", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.3324604034423828, |
| "scores": { |
| "A": -10.100502014160156, |
| "B": -10.432962417602539, |
| "C": -11.973075866699219, |
| "D": -10.604475021362305, |
| "E": -12.458782196044922 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 4.506379127502441, |
| "scores": { |
| "A": -7.595697402954102, |
| "B": -12.102076530456543, |
| "C": -13.821539878845215, |
| "D": -12.81662654876709, |
| "E": -14.543049812316895 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-102", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -4.450368881225586, |
| "scores": { |
| "A": -11.315129280090332, |
| "B": -10.445816993713379, |
| "C": -10.68630599975586, |
| "D": -14.12060832977295, |
| "E": -14.896185874938965 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.9963579177856445, |
| "scores": { |
| "A": -6.833308219909668, |
| "B": -10.518851280212402, |
| "C": -13.344768524169922, |
| "D": -13.985496520996094, |
| "E": -14.829666137695312 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-103", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1493282318115234, |
| "scores": { |
| "A": -9.748441696166992, |
| "B": -8.529296875, |
| "C": -9.693557739257812, |
| "D": -11.449222564697266, |
| "E": -9.678625106811523 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.7451763153076172, |
| "scores": { |
| "A": -7.868520736694336, |
| "B": -8.613697052001953, |
| "C": -10.544960975646973, |
| "D": -9.806873321533203, |
| "E": -8.439764022827148 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-104", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -3.3600540161132812, |
| "scores": { |
| "A": -14.189347267150879, |
| "B": -11.361625671386719, |
| "C": -11.152084350585938, |
| "D": -14.512138366699219, |
| "E": -15.981123924255371 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.8950366973876953, |
| "scores": { |
| "A": -8.590035438537598, |
| "B": -10.608055114746094, |
| "C": -11.210797309875488, |
| "D": -11.485072135925293, |
| "E": -10.96902847290039 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-105", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 3.0392799377441406, |
| "scores": { |
| "A": -11.515534400939941, |
| "B": -12.032148361206055, |
| "C": -8.4762544631958, |
| "D": -13.967401504516602, |
| "E": -13.267354011535645 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.133573055267334, |
| "scores": { |
| "A": -7.463276386260986, |
| "B": -9.80911636352539, |
| "C": -8.59684944152832, |
| "D": -13.382390975952148, |
| "E": -13.58960247039795 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-106", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.9239311218261719, |
| "scores": { |
| "A": -10.124608039855957, |
| "B": -9.200676918029785, |
| "C": -9.467672348022461, |
| "D": -13.042096138000488, |
| "E": -13.135705947875977 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.8698911666870117, |
| "scores": { |
| "A": -7.598464012145996, |
| "B": -11.948047637939453, |
| "C": -13.422207832336426, |
| "D": -11.468355178833008, |
| "E": -14.042037010192871 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-107", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.0206260681152344, |
| "scores": { |
| "A": -9.933467864990234, |
| "B": -8.912841796875, |
| "C": -11.042007446289062, |
| "D": -12.203380584716797, |
| "E": -10.170745849609375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.180150032043457, |
| "scores": { |
| "A": -7.0703229904174805, |
| "B": -10.250473022460938, |
| "C": -11.684900283813477, |
| "D": -10.878337860107422, |
| "E": -11.952753067016602 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-108", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.651386260986328, |
| "scores": { |
| "A": -12.283191680908203, |
| "B": -9.631805419921875, |
| "C": -12.84640121459961, |
| "D": -13.274940490722656, |
| "E": -14.580394744873047 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.882678508758545, |
| "scores": { |
| "A": -6.49999475479126, |
| "B": -8.382673263549805, |
| "C": -11.06556510925293, |
| "D": -10.006368637084961, |
| "E": -11.96578598022461 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-109", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -0.8047208786010742, |
| "scores": { |
| "A": -10.049991607666016, |
| "B": -11.320069313049316, |
| "C": -10.77479362487793, |
| "D": -12.454825401306152, |
| "E": -9.970072746276855 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.179561614990234, |
| "scores": { |
| "A": -8.94586181640625, |
| "B": -13.091926574707031, |
| "C": -15.125423431396484, |
| "D": -13.694250106811523, |
| "E": -14.551794052124023 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-110", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.271200180053711, |
| "scores": { |
| "A": -12.053251266479492, |
| "B": -8.445196151733398, |
| "C": -11.71639633178711, |
| "D": -12.579341888427734, |
| "E": -15.129302024841309 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.586641788482666, |
| "scores": { |
| "A": -5.685309886932373, |
| "B": -9.068038940429688, |
| "C": -11.271951675415039, |
| "D": -11.380401611328125, |
| "E": -13.35078239440918 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-111", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.18841552734375, |
| "scores": { |
| "A": -9.808207511901855, |
| "B": -9.283623695373535, |
| "C": -9.472039222717285, |
| "D": -10.7572660446167, |
| "E": -11.43770980834961 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.6287879943847656, |
| "scores": { |
| "A": -8.439443588256836, |
| "B": -12.068231582641602, |
| "C": -12.49129867553711, |
| "D": -13.331933975219727, |
| "E": -14.553701400756836 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-112", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -5.095149040222168, |
| "scores": { |
| "A": -16.67582893371582, |
| "B": -16.2126522064209, |
| "C": -11.580679893493652, |
| "D": -13.807619094848633, |
| "E": -15.536310195922852 |
| } |
| }, |
| "ablated": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.09568214416503906, |
| "scores": { |
| "A": -10.995382308959961, |
| "B": -13.306709289550781, |
| "C": -11.021820068359375, |
| "D": -10.899700164794922, |
| "E": -15.755931854248047 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-113", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.3748960494995117, |
| "scores": { |
| "A": -10.411832809448242, |
| "B": -9.715240478515625, |
| "C": -10.090136528015137, |
| "D": -12.844676971435547, |
| "E": -11.264602661132812 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.242756366729736, |
| "scores": { |
| "A": -5.61425256729126, |
| "B": -10.998117446899414, |
| "C": -12.857008934020996, |
| "D": -11.544221878051758, |
| "E": -13.189793586730957 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-114", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -0.20656394958496094, |
| "scores": { |
| "A": -12.522754669189453, |
| "B": -11.953495025634766, |
| "C": -12.375024795532227, |
| "D": -12.647726058959961, |
| "E": -11.746931076049805 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.894749641418457, |
| "scores": { |
| "A": -5.269627571105957, |
| "B": -10.164377212524414, |
| "C": -12.320079803466797, |
| "D": -10.714139938354492, |
| "E": -12.523801803588867 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-115", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.2912511825561523, |
| "scores": { |
| "A": -10.948766708374023, |
| "B": -11.498233795166016, |
| "C": -11.3041410446167, |
| "D": -12.34277629852295, |
| "E": -12.240017890930176 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.3497333526611328, |
| "scores": { |
| "A": -9.884903907775879, |
| "B": -12.706127166748047, |
| "C": -13.208802223205566, |
| "D": -11.332338333129883, |
| "E": -11.234637260437012 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-116", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.7076244354248047, |
| "scores": { |
| "A": -12.037410736083984, |
| "B": -9.75387191772461, |
| "C": -11.461496353149414, |
| "D": -11.536352157592773, |
| "E": -11.817276000976562 |
| } |
| }, |
| "ablated": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.5289134979248047, |
| "scores": { |
| "A": -7.282122611999512, |
| "B": -7.493680000305176, |
| "C": -8.805983543395996, |
| "D": -6.964766502380371, |
| "E": -7.28157901763916 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-117", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.203188896179199, |
| "scores": { |
| "A": -11.712003707885742, |
| "B": -9.130005836486816, |
| "C": -9.083451271057129, |
| "D": -11.028267860412598, |
| "E": -11.286640167236328 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.0263042449951172, |
| "scores": { |
| "A": -6.421114444732666, |
| "B": -6.90539026260376, |
| "C": -7.702053546905518, |
| "D": -8.437675476074219, |
| "E": -7.447418689727783 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-118", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.34549522399902344, |
| "scores": { |
| "A": -12.712060928344727, |
| "B": -14.21017074584961, |
| "C": -13.05755615234375, |
| "D": -14.676868438720703, |
| "E": -13.82982063293457 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.252425193786621, |
| "scores": { |
| "A": -7.9384307861328125, |
| "B": -10.416237831115723, |
| "C": -10.190855979919434, |
| "D": -11.566178321838379, |
| "E": -11.377034187316895 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-119", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.894613265991211, |
| "scores": { |
| "A": -12.752466201782227, |
| "B": -11.295127868652344, |
| "C": -13.406665802001953, |
| "D": -13.189741134643555, |
| "E": -12.74017333984375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.093064308166504, |
| "scores": { |
| "A": -6.806607246398926, |
| "B": -7.607048988342285, |
| "C": -10.043014526367188, |
| "D": -10.89967155456543, |
| "E": -11.805773735046387 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-120", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.3595123291015625, |
| "scores": { |
| "A": -12.646347045898438, |
| "B": -10.183612823486328, |
| "C": -10.54312515258789, |
| "D": -11.979488372802734, |
| "E": -12.640970230102539 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.8485918045043945, |
| "scores": { |
| "A": -7.463525772094727, |
| "B": -12.312117576599121, |
| "C": -11.753535270690918, |
| "D": -12.008286476135254, |
| "E": -13.767097473144531 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-121", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.6828231811523438, |
| "scores": { |
| "A": -12.19267463684082, |
| "B": -10.822580337524414, |
| "C": -11.00235366821289, |
| "D": -13.505403518676758, |
| "E": -13.309852600097656 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.824639797210693, |
| "scores": { |
| "A": -7.172897815704346, |
| "B": -9.329679489135742, |
| "C": -10.56558609008789, |
| "D": -11.997537612915039, |
| "E": -11.65449333190918 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-122", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6598358154296875, |
| "scores": { |
| "A": -11.076019287109375, |
| "B": -10.416183471679688, |
| "C": -13.238750457763672, |
| "D": -13.289159774780273, |
| "E": -13.489381790161133 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.3248538970947266, |
| "scores": { |
| "A": -6.911991119384766, |
| "B": -9.236845016479492, |
| "C": -12.405698776245117, |
| "D": -10.99496078491211, |
| "E": -12.164006233215332 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-123", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.928288459777832, |
| "scores": { |
| "A": -12.817946434020996, |
| "B": -13.251622200012207, |
| "C": -10.08199405670166, |
| "D": -12.010282516479492, |
| "E": -12.828923225402832 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.036839485168457, |
| "scores": { |
| "A": -7.727773666381836, |
| "B": -10.925074577331543, |
| "C": -11.764613151550293, |
| "D": -11.528144836425781, |
| "E": -13.928091049194336 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-124", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.9609298706054688, |
| "scores": { |
| "A": -10.947786331176758, |
| "B": -9.929666519165039, |
| "C": -10.890596389770508, |
| "D": -11.183786392211914, |
| "E": -11.429544448852539 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.408071517944336, |
| "scores": { |
| "A": -9.135915756225586, |
| "B": -11.747968673706055, |
| "C": -11.543987274169922, |
| "D": -11.629928588867188, |
| "E": -11.322809219360352 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-125", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.26287078857421875, |
| "scores": { |
| "A": -12.876455307006836, |
| "B": -12.006429672241211, |
| "C": -10.34354305267334, |
| "D": -10.606413841247559, |
| "E": -11.505398750305176 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.1885986328125, |
| "scores": { |
| "A": -6.909121513366699, |
| "B": -8.705928802490234, |
| "C": -10.0977201461792, |
| "D": -9.862305641174316, |
| "E": -10.177146911621094 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-126", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.5388050079345703, |
| "scores": { |
| "A": -11.017732620239258, |
| "B": -8.353882789611816, |
| "C": -8.892687797546387, |
| "D": -9.955700874328613, |
| "E": -10.49584674835205 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.850264549255371, |
| "scores": { |
| "A": -5.791948318481445, |
| "B": -10.630058288574219, |
| "C": -11.642212867736816, |
| "D": -11.66257381439209, |
| "E": -12.981222152709961 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-127", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.07229804992675781, |
| "scores": { |
| "A": -10.561637878417969, |
| "B": -10.769161224365234, |
| "C": -10.85409164428711, |
| "D": -10.489339828491211, |
| "E": -11.068832397460938 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.2087717056274414, |
| "scores": { |
| "A": -6.518125534057617, |
| "B": -8.725174903869629, |
| "C": -9.385905265808105, |
| "D": -7.726897239685059, |
| "E": -8.385092735290527 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-128", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.00350284576416, |
| "scores": { |
| "A": -10.787993431091309, |
| "B": -10.3438081741333, |
| "C": -10.841973304748535, |
| "D": -13.455949783325195, |
| "E": -12.347311019897461 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.958767890930176, |
| "scores": { |
| "A": -5.449042320251465, |
| "B": -7.945782661437988, |
| "C": -9.260028839111328, |
| "D": -11.206633567810059, |
| "E": -10.40781021118164 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-129", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.607754707336426, |
| "scores": { |
| "A": -8.333622932434082, |
| "B": -10.68683910369873, |
| "C": -9.665505409240723, |
| "D": -10.941377639770508, |
| "E": -9.643619537353516 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.0246152877807617, |
| "scores": { |
| "A": -6.0440778732299805, |
| "B": -8.230778694152832, |
| "C": -7.669195175170898, |
| "D": -8.068693161010742, |
| "E": -8.261618614196777 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-130", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.5736770629882812, |
| "scores": { |
| "A": -12.52768611907959, |
| "B": -11.624752044677734, |
| "C": -14.400633811950684, |
| "D": -11.051074981689453, |
| "E": -12.196588516235352 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.6399145126342773, |
| "scores": { |
| "A": -7.714714050292969, |
| "B": -7.738489151000977, |
| "C": -10.441914558410645, |
| "D": -8.354628562927246, |
| "E": -8.231303215026855 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-131", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.308675765991211, |
| "scores": { |
| "A": -14.739927291870117, |
| "B": -9.621098518371582, |
| "C": -12.929774284362793, |
| "D": -13.892219543457031, |
| "E": -14.483654022216797 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7574863433837891, |
| "scores": { |
| "A": -9.305915832519531, |
| "B": -8.548429489135742, |
| "C": -11.97828483581543, |
| "D": -11.67667007446289, |
| "E": -12.261627197265625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-132", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.159423828125, |
| "scores": { |
| "A": -12.748001098632812, |
| "B": -9.872476577758789, |
| "C": -11.450910568237305, |
| "D": -11.085186004638672, |
| "E": -13.031900405883789 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.519545555114746, |
| "scores": { |
| "A": -9.1354398727417, |
| "B": -9.123946189880371, |
| "C": -10.554903030395508, |
| "D": -10.345840454101562, |
| "E": -10.643491744995117 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-133", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -4.092678070068359, |
| "scores": { |
| "A": -14.868802070617676, |
| "B": -16.15050506591797, |
| "C": -10.776124000549316, |
| "D": -14.600775718688965, |
| "E": -13.98430061340332 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.853121757507324, |
| "scores": { |
| "A": -8.56944465637207, |
| "B": -13.000343322753906, |
| "C": -12.422566413879395, |
| "D": -14.54437255859375, |
| "E": -14.450194358825684 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-134", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.462491989135742, |
| "scores": { |
| "A": -10.72162914276123, |
| "B": -11.912126541137695, |
| "C": -13.152649879455566, |
| "D": -13.184121131896973, |
| "E": -13.222978591918945 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.007552146911621, |
| "scores": { |
| "A": -7.717267990112305, |
| "B": -10.804107666015625, |
| "C": -11.367389678955078, |
| "D": -12.724820137023926, |
| "E": -14.619938850402832 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-135", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.5100650787353516, |
| "scores": { |
| "A": -13.62440299987793, |
| "B": -10.135202407836914, |
| "C": -10.906095504760742, |
| "D": -13.645267486572266, |
| "E": -13.970268249511719 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.0002031326293945, |
| "scores": { |
| "A": -7.204651832580566, |
| "B": -8.548638343811035, |
| "C": -9.101480484008789, |
| "D": -9.204854965209961, |
| "E": -10.522067070007324 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-136", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.6944398880004883, |
| "scores": { |
| "A": -12.556368827819824, |
| "B": -11.861928939819336, |
| "C": -11.924398422241211, |
| "D": -14.289663314819336, |
| "E": -14.393033981323242 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.863565444946289, |
| "scores": { |
| "A": -10.153200149536133, |
| "B": -12.455869674682617, |
| "C": -12.468259811401367, |
| "D": -12.016765594482422, |
| "E": -13.061859130859375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-137", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.4276561737060547, |
| "scores": { |
| "A": -10.441576957702637, |
| "B": -10.515706062316895, |
| "C": -10.08804988861084, |
| "D": -12.436307907104492, |
| "E": -12.847251892089844 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.2915706634521484, |
| "scores": { |
| "A": -9.814286231994629, |
| "B": -13.105856895446777, |
| "C": -11.835171699523926, |
| "D": -12.84805965423584, |
| "E": -13.396150588989258 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-138", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.1457071304321289, |
| "scores": { |
| "A": -11.518792152404785, |
| "B": -11.373085021972656, |
| "C": -13.849699974060059, |
| "D": -13.589049339294434, |
| "E": -12.325687408447266 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 2.37286376953125, |
| "scores": { |
| "A": -9.340991973876953, |
| "B": -11.942909240722656, |
| "C": -13.375253677368164, |
| "D": -12.332799911499023, |
| "E": -11.713855743408203 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-139", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -4.983943939208984, |
| "scores": { |
| "A": -11.229677200317383, |
| "B": -9.654775619506836, |
| "C": -11.174234390258789, |
| "D": -12.573564529418945, |
| "E": -14.63871955871582 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.07908821105957, |
| "scores": { |
| "A": -7.586383819580078, |
| "B": -9.389440536499023, |
| "C": -10.293685913085938, |
| "D": -9.784049987792969, |
| "E": -11.665472030639648 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-140", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.090902328491211, |
| "scores": { |
| "A": -12.17054557800293, |
| "B": -10.950679779052734, |
| "C": -12.478940963745117, |
| "D": -12.041582107543945, |
| "E": -12.825494766235352 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.0806522369384766, |
| "scores": { |
| "A": -8.631109237670898, |
| "B": -9.711761474609375, |
| "C": -10.810302734375, |
| "D": -10.214776992797852, |
| "E": -11.603350639343262 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-141", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.3972196578979492, |
| "scores": { |
| "A": -15.668845176696777, |
| "B": -14.022212028503418, |
| "C": -12.345376968383789, |
| "D": -12.742596626281738, |
| "E": -13.434144973754883 |
| } |
| }, |
| "ablated": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -1.4827747344970703, |
| "scores": { |
| "A": -9.032247543334961, |
| "B": -10.177014350891113, |
| "C": -9.580657005310059, |
| "D": -8.116410255432129, |
| "E": -8.097882270812988 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-142", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.03481101989746094, |
| "scores": { |
| "A": -12.051063537597656, |
| "B": -12.085874557495117, |
| "C": -12.250658988952637, |
| "D": -13.729873657226562, |
| "E": -13.645383834838867 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 2.3378829956054688, |
| "scores": { |
| "A": -10.318166732788086, |
| "B": -13.508720397949219, |
| "C": -12.656049728393555, |
| "D": -13.118291854858398, |
| "E": -14.187196731567383 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-143", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.2146177291870117, |
| "scores": { |
| "A": -10.845283508300781, |
| "B": -9.34599781036377, |
| "C": -10.440536499023438, |
| "D": -10.560615539550781, |
| "E": -10.313671112060547 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.8671979904174805, |
| "scores": { |
| "A": -8.795687675476074, |
| "B": -11.772283554077148, |
| "C": -12.032180786132812, |
| "D": -10.662885665893555, |
| "E": -11.071569442749023 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-144", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.5195407867431641, |
| "scores": { |
| "A": -14.143760681152344, |
| "B": -13.62421989440918, |
| "C": -16.54352569580078, |
| "D": -16.72017478942871, |
| "E": -16.012075424194336 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 7.688299179077148, |
| "scores": { |
| "A": -6.442632675170898, |
| "B": -14.130931854248047, |
| "C": -17.20372772216797, |
| "D": -14.199527740478516, |
| "E": -15.404621124267578 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-145", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -0.12565994262695312, |
| "scores": { |
| "A": -13.606302261352539, |
| "B": -11.843841552734375, |
| "C": -14.27863883972168, |
| "D": -13.242870330810547, |
| "E": -11.718181610107422 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.291046142578125, |
| "scores": { |
| "A": -8.400792121887207, |
| "B": -11.691838264465332, |
| "C": -15.319357872009277, |
| "D": -13.33833122253418, |
| "E": -12.860288619995117 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-146", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -1.6924715042114258, |
| "scores": { |
| "A": -9.780816078186035, |
| "B": -11.162944793701172, |
| "C": -9.470473289489746, |
| "D": -10.71984577178955, |
| "E": -9.969797134399414 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.404109001159668, |
| "scores": { |
| "A": -8.36557388305664, |
| "B": -10.769682884216309, |
| "C": -14.039958000183105, |
| "D": -13.555811882019043, |
| "E": -13.034090042114258 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-147", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.14206600189208984, |
| "scores": { |
| "A": -9.889412879943848, |
| "B": -10.670077323913574, |
| "C": -11.043986320495605, |
| "D": -12.524433135986328, |
| "E": -10.031478881835938 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.204798698425293, |
| "scores": { |
| "A": -6.384355545043945, |
| "B": -8.133864402770996, |
| "C": -9.067092895507812, |
| "D": -10.208111763000488, |
| "E": -8.589154243469238 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-148", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.09283638000488281, |
| "scores": { |
| "A": -11.842838287353516, |
| "B": -8.686580657958984, |
| "C": -9.391075134277344, |
| "D": -8.593744277954102, |
| "E": -10.327585220336914 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.2917442321777344, |
| "scores": { |
| "A": -8.228094100952148, |
| "B": -9.228675842285156, |
| "C": -9.42142105102539, |
| "D": -8.519838333129883, |
| "E": -9.596782684326172 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-149", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.9827327728271484, |
| "scores": { |
| "A": -12.001296997070312, |
| "B": -10.335747718811035, |
| "C": -11.285538673400879, |
| "D": -11.318480491638184, |
| "E": -11.636820793151855 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.491743564605713, |
| "scores": { |
| "A": -7.410029888153076, |
| "B": -10.250740051269531, |
| "C": -13.93216323852539, |
| "D": -13.901773452758789, |
| "E": -15.188919067382812 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-150", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.9258947372436523, |
| "scores": { |
| "A": -12.320900917053223, |
| "B": -9.475645065307617, |
| "C": -10.984822273254395, |
| "D": -12.40153980255127, |
| "E": -12.635085105895996 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.376846790313721, |
| "scores": { |
| "A": -7.0111308097839355, |
| "B": -12.037229537963867, |
| "C": -12.431285858154297, |
| "D": -12.387977600097656, |
| "E": -13.794790267944336 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-151", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -2.647052764892578, |
| "scores": { |
| "A": -15.152583122253418, |
| "B": -13.6299467086792, |
| "C": -14.575118064880371, |
| "D": -15.285728454589844, |
| "E": -11.928065299987793 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.1320362091064453, |
| "scores": { |
| "A": -10.271482467651367, |
| "B": -11.43825912475586, |
| "C": -13.403518676757812, |
| "D": -11.501873016357422, |
| "E": -12.473245620727539 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-152", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6472129821777344, |
| "scores": { |
| "A": -12.141305923461914, |
| "B": -11.08128833770752, |
| "C": -11.728501319885254, |
| "D": -11.744885444641113, |
| "E": -11.734070777893066 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.606741905212402, |
| "scores": { |
| "A": -8.290619850158691, |
| "B": -12.897361755371094, |
| "C": -16.176721572875977, |
| "D": -13.130666732788086, |
| "E": -13.918773651123047 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-153", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.4722251892089844, |
| "scores": { |
| "A": -10.484762191772461, |
| "B": -10.956987380981445, |
| "C": -12.194547653198242, |
| "D": -15.127632141113281, |
| "E": -13.331162452697754 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.3751373291015625, |
| "scores": { |
| "A": -5.1184186935424805, |
| "B": -8.740020751953125, |
| "C": -8.493556022644043, |
| "D": -12.901175498962402, |
| "E": -12.068525314331055 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-154", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.98802375793457, |
| "scores": { |
| "A": -10.425346374511719, |
| "B": -11.881086349487305, |
| "C": -11.785425186157227, |
| "D": -15.413370132446289, |
| "E": -13.525296211242676 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.682590961456299, |
| "scores": { |
| "A": -6.576329708099365, |
| "B": -9.645816802978516, |
| "C": -10.998794555664062, |
| "D": -11.258920669555664, |
| "E": -11.089090347290039 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-155", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -3.2458534240722656, |
| "scores": { |
| "A": -11.75346565246582, |
| "B": -10.164717674255371, |
| "C": -8.507612228393555, |
| "D": -12.298287391662598, |
| "E": -12.295981407165527 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.698258399963379, |
| "scores": { |
| "A": -6.5401506423950195, |
| "B": -9.082632064819336, |
| "C": -8.238409042358398, |
| "D": -9.46942138671875, |
| "E": -8.837421417236328 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-156", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.5458030700683594, |
| "scores": { |
| "A": -11.058704376220703, |
| "B": -9.512901306152344, |
| "C": -10.548510551452637, |
| "D": -10.738350868225098, |
| "E": -10.144469261169434 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.5494518280029297, |
| "scores": { |
| "A": -8.577381134033203, |
| "B": -9.532999038696289, |
| "C": -10.54125690460205, |
| "D": -9.21225643157959, |
| "E": -9.126832962036133 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-157", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.2852640151977539, |
| "scores": { |
| "A": -10.463203430175781, |
| "B": -8.768805503845215, |
| "C": -9.054069519042969, |
| "D": -9.892763137817383, |
| "E": -10.059773445129395 |
| } |
| }, |
| "ablated": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -1.2878742218017578, |
| "scores": { |
| "A": -7.138072967529297, |
| "B": -8.380763053894043, |
| "C": -8.266191482543945, |
| "D": -7.350512504577637, |
| "E": -6.9783172607421875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-158", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.5956363677978516, |
| "scores": { |
| "A": -11.858783721923828, |
| "B": -9.868914604187012, |
| "C": -12.355928421020508, |
| "D": -12.464550971984863, |
| "E": -12.451033592224121 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.8353786468505859, |
| "scores": { |
| "A": -7.951072692871094, |
| "B": -10.968667030334473, |
| "C": -9.150461196899414, |
| "D": -8.78645133972168, |
| "E": -9.640289306640625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-159", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.2088556289672852, |
| "scores": { |
| "A": -14.390352249145508, |
| "B": -10.970995903015137, |
| "C": -12.179851531982422, |
| "D": -14.215005874633789, |
| "E": -13.121991157531738 |
| } |
| }, |
| "ablated": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -1.5493526458740234, |
| "scores": { |
| "A": -10.103426933288574, |
| "B": -10.092862129211426, |
| "C": -11.540170669555664, |
| "D": -9.99081802368164, |
| "E": -10.121420860290527 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-160", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.848499298095703, |
| "scores": { |
| "A": -11.598515510559082, |
| "B": -14.197969436645508, |
| "C": -14.457886695861816, |
| "D": -14.447014808654785, |
| "E": -14.36185073852539 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.696338653564453, |
| "scores": { |
| "A": -7.641029357910156, |
| "B": -11.26103687286377, |
| "C": -12.703668594360352, |
| "D": -12.33736801147461, |
| "E": -12.883567810058594 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-161", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.1676807403564453, |
| "scores": { |
| "A": -14.895027160644531, |
| "B": -10.322026252746582, |
| "C": -12.489706993103027, |
| "D": -12.704346656799316, |
| "E": -15.176275253295898 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.219618797302246, |
| "scores": { |
| "A": -6.856925964355469, |
| "B": -7.009190559387207, |
| "C": -9.076544761657715, |
| "D": -9.654449462890625, |
| "E": -9.462542533874512 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-162", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.2371959686279297, |
| "scores": { |
| "A": -10.148950576782227, |
| "B": -10.49891471862793, |
| "C": -12.386146545410156, |
| "D": -11.85212516784668, |
| "E": -11.997817993164062 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.578624725341797, |
| "scores": { |
| "A": -5.560625076293945, |
| "B": -10.614078521728516, |
| "C": -10.139249801635742, |
| "D": -10.14529800415039, |
| "E": -11.632416725158691 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-163", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.3345470428466797, |
| "scores": { |
| "A": -10.16016960144043, |
| "B": -10.63470458984375, |
| "C": -11.49471664428711, |
| "D": -14.061302185058594, |
| "E": -11.96687126159668 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.617689609527588, |
| "scores": { |
| "A": -7.696357250213623, |
| "B": -11.909870147705078, |
| "C": -12.314046859741211, |
| "D": -12.379276275634766, |
| "E": -10.673864364624023 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-164", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.0201797485351562, |
| "scores": { |
| "A": -11.76971435546875, |
| "B": -11.384875297546387, |
| "C": -12.587923049926758, |
| "D": -14.405055046081543, |
| "E": -13.626121520996094 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.4918289184570312, |
| "scores": { |
| "A": -10.035612106323242, |
| "B": -13.309289932250977, |
| "C": -14.361808776855469, |
| "D": -12.527441024780273, |
| "E": -13.649295806884766 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-165", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.3930606842041016, |
| "scores": { |
| "A": -13.838768005371094, |
| "B": -11.385400772094727, |
| "C": -14.192607879638672, |
| "D": -12.778461456298828, |
| "E": -14.867376327514648 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.0367560386657715, |
| "scores": { |
| "A": -7.127369403839111, |
| "B": -8.74629020690918, |
| "C": -13.14265251159668, |
| "D": -13.164125442504883, |
| "E": -14.882063865661621 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-166", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.7403507232666016, |
| "scores": { |
| "A": -13.24523639678955, |
| "B": -8.922253608703613, |
| "C": -9.740599632263184, |
| "D": -10.831602096557617, |
| "E": -11.662604331970215 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.432188510894775, |
| "scores": { |
| "A": -6.22821569442749, |
| "B": -8.795976638793945, |
| "C": -9.08587646484375, |
| "D": -9.576181411743164, |
| "E": -11.660404205322266 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-167", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.286722183227539, |
| "scores": { |
| "A": -13.319049835205078, |
| "B": -10.63465690612793, |
| "C": -12.921379089355469, |
| "D": -16.10821533203125, |
| "E": -14.74123764038086 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.3104257583618164, |
| "scores": { |
| "A": -8.05471420288086, |
| "B": -11.365139961242676, |
| "C": -15.134896278381348, |
| "D": -13.336740493774414, |
| "E": -14.394715309143066 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-168", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.5851516723632812, |
| "scores": { |
| "A": -11.949223518371582, |
| "B": -12.088781356811523, |
| "C": -12.600192070007324, |
| "D": -13.534375190734863, |
| "E": -13.724043846130371 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.811002731323242, |
| "scores": { |
| "A": -8.285033226013184, |
| "B": -12.87575912475586, |
| "C": -14.61474609375, |
| "D": -13.096035957336426, |
| "E": -14.469371795654297 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-169", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.259394645690918, |
| "scores": { |
| "A": -11.700346946716309, |
| "B": -9.44095230102539, |
| "C": -11.634363174438477, |
| "D": -13.524284362792969, |
| "E": -12.442931175231934 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.1425189971923828, |
| "scores": { |
| "A": -9.171747207641602, |
| "B": -10.314266204833984, |
| "C": -10.48922348022461, |
| "D": -11.292402267456055, |
| "E": -11.195283889770508 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-170", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.667557716369629, |
| "scores": { |
| "A": -12.687468528747559, |
| "B": -13.540651321411133, |
| "C": -15.73199462890625, |
| "D": -14.172163009643555, |
| "E": -14.355026245117188 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.3893675804138184, |
| "scores": { |
| "A": -7.45543909072876, |
| "B": -11.622758865356445, |
| "C": -12.694786071777344, |
| "D": -10.333147048950195, |
| "E": -10.844806671142578 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-171", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.3269481658935547, |
| "scores": { |
| "A": -12.739081382751465, |
| "B": -11.41213321685791, |
| "C": -12.104532241821289, |
| "D": -14.289388656616211, |
| "E": -13.22745132446289 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.89689302444458, |
| "scores": { |
| "A": -7.288093090057373, |
| "B": -11.184986114501953, |
| "C": -14.11172103881836, |
| "D": -13.88416862487793, |
| "E": -14.152048110961914 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-172", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.552183151245117, |
| "scores": { |
| "A": -11.899900436401367, |
| "B": -9.34771728515625, |
| "C": -9.6818265914917, |
| "D": -12.592266082763672, |
| "E": -11.358457565307617 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.2487936019897461, |
| "scores": { |
| "A": -7.7580461502075195, |
| "B": -8.006839752197266, |
| "C": -8.965506553649902, |
| "D": -10.227289199829102, |
| "E": -8.758523941040039 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-173", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.587629318237305, |
| "scores": { |
| "A": -9.585987091064453, |
| "B": -13.276374816894531, |
| "C": -13.367696762084961, |
| "D": -14.173616409301758, |
| "E": -12.080738067626953 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.505217552185059, |
| "scores": { |
| "A": -7.101271629333496, |
| "B": -12.202089309692383, |
| "C": -14.173044204711914, |
| "D": -14.606489181518555, |
| "E": -14.75442123413086 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-174", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.24216461181640625, |
| "scores": { |
| "A": -14.199091911315918, |
| "B": -13.47407054901123, |
| "C": -14.678143501281738, |
| "D": -13.550527572631836, |
| "E": -13.716235160827637 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.9764862060546875, |
| "scores": { |
| "A": -8.514163970947266, |
| "B": -11.301080703735352, |
| "C": -14.558061599731445, |
| "D": -12.563972473144531, |
| "E": -14.490650177001953 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-175", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.3755264282226562, |
| "scores": { |
| "A": -12.588652610778809, |
| "B": -11.279211044311523, |
| "C": -12.724414825439453, |
| "D": -13.617168426513672, |
| "E": -12.65473747253418 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.200804233551025, |
| "scores": { |
| "A": -7.825118541717529, |
| "B": -12.34547233581543, |
| "C": -15.431029319763184, |
| "D": -11.632364273071289, |
| "E": -14.025922775268555 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-176", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.7927188873291016, |
| "scores": { |
| "A": -10.986503601074219, |
| "B": -10.264165878295898, |
| "C": -13.056884765625, |
| "D": -13.231691360473633, |
| "E": -11.577075958251953 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.047300338745117, |
| "scores": { |
| "A": -8.569661140441895, |
| "B": -10.96971607208252, |
| "C": -12.616961479187012, |
| "D": -10.226570129394531, |
| "E": -10.359309196472168 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-177", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.117347717285156, |
| "scores": { |
| "A": -9.619380950927734, |
| "B": -9.922872543334961, |
| "C": -13.73672866821289, |
| "D": -11.895669937133789, |
| "E": -11.525716781616211 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.858722686767578, |
| "scores": { |
| "A": -5.202523231506348, |
| "B": -7.391201972961426, |
| "C": -12.061245918273926, |
| "D": -9.575565338134766, |
| "E": -11.126143455505371 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-178", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.44650745391845703, |
| "scores": { |
| "A": -13.206219673156738, |
| "B": -11.094629287719727, |
| "C": -12.79085922241211, |
| "D": -12.61279582977295, |
| "E": -10.64812183380127 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.734641075134277, |
| "scores": { |
| "A": -5.792222023010254, |
| "B": -10.40644359588623, |
| "C": -8.512224197387695, |
| "D": -10.881692886352539, |
| "E": -12.526863098144531 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-179", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.587289810180664, |
| "scores": { |
| "A": -9.154937744140625, |
| "B": -10.742227554321289, |
| "C": -11.76572322845459, |
| "D": -12.661623001098633, |
| "E": -12.793743133544922 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.158479690551758, |
| "scores": { |
| "A": -5.91640567779541, |
| "B": -8.074885368347168, |
| "C": -10.336216926574707, |
| "D": -10.604473114013672, |
| "E": -12.273855209350586 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-180", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.503520965576172, |
| "scores": { |
| "A": -11.160909652709961, |
| "B": -9.851707458496094, |
| "C": -12.355228424072266, |
| "D": -13.170286178588867, |
| "E": -12.383331298828125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.526637077331543, |
| "scores": { |
| "A": -10.631556510925293, |
| "B": -11.933387756347656, |
| "C": -12.158193588256836, |
| "D": -11.02450942993164, |
| "E": -11.620341300964355 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-181", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7682161331176758, |
| "scores": { |
| "A": -9.629287719726562, |
| "B": -8.861071586608887, |
| "C": -11.832342147827148, |
| "D": -11.63463020324707, |
| "E": -10.680866241455078 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.5154037475585938, |
| "scores": { |
| "A": -7.565939903259277, |
| "B": -9.081343650817871, |
| "C": -10.455299377441406, |
| "D": -9.157304763793945, |
| "E": -9.032361030578613 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-182", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.821702003479004, |
| "scores": { |
| "A": -11.69933032989502, |
| "B": -15.636759757995605, |
| "C": -15.521032333374023, |
| "D": -15.261280059814453, |
| "E": -15.790119171142578 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.4775166511535645, |
| "scores": { |
| "A": -7.3024001121521, |
| "B": -11.616369247436523, |
| "C": -13.779916763305664, |
| "D": -14.841501235961914, |
| "E": -15.299184799194336 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-183", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.4096593856811523, |
| "scores": { |
| "A": -12.662055969238281, |
| "B": -9.372528076171875, |
| "C": -10.782187461853027, |
| "D": -13.160992622375488, |
| "E": -13.141705513000488 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.7217111587524414, |
| "scores": { |
| "A": -8.667959213256836, |
| "B": -7.500253200531006, |
| "C": -6.7785420417785645, |
| "D": -9.29892349243164, |
| "E": -10.76202392578125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-184", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.1710405349731445, |
| "scores": { |
| "A": -11.335744857788086, |
| "B": -9.919331550598145, |
| "C": -11.165321350097656, |
| "D": -13.090372085571289, |
| "E": -12.288164138793945 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.067633628845215, |
| "scores": { |
| "A": -8.7251615524292, |
| "B": -11.410130500793457, |
| "C": -11.521978378295898, |
| "D": -10.792795181274414, |
| "E": -11.264982223510742 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-185", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.1894311904907227, |
| "scores": { |
| "A": -11.377983093261719, |
| "B": -9.404431343078613, |
| "C": -12.593862533569336, |
| "D": -12.444841384887695, |
| "E": -12.661911964416504 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -8.37000560760498, |
| "scores": { |
| "A": -5.967945098876953, |
| "B": -12.820409774780273, |
| "C": -14.337950706481934, |
| "D": -13.287762641906738, |
| "E": -14.76830005645752 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-186", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.3980731964111328, |
| "scores": { |
| "A": -11.276969909667969, |
| "B": -11.675043106079102, |
| "C": -14.097780227661133, |
| "D": -14.689929962158203, |
| "E": -13.63922119140625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 2.4271883964538574, |
| "scores": { |
| "A": -7.584385395050049, |
| "B": -10.011573791503906, |
| "C": -13.923393249511719, |
| "D": -12.747108459472656, |
| "E": -12.790593147277832 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-187", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.0718564987182617, |
| "scores": { |
| "A": -12.528532028198242, |
| "B": -10.982802391052246, |
| "C": -13.054658889770508, |
| "D": -14.590221405029297, |
| "E": -14.210501670837402 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.2736144065856934, |
| "scores": { |
| "A": -7.474527835845947, |
| "B": -9.265643119812012, |
| "C": -10.74814224243164, |
| "D": -11.135126113891602, |
| "E": -11.180526733398438 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-188", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.121814727783203, |
| "scores": { |
| "A": -12.144158363342285, |
| "B": -13.037331581115723, |
| "C": -13.73487377166748, |
| "D": -14.09709358215332, |
| "E": -15.265973091125488 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.235786437988281, |
| "scores": { |
| "A": -11.129180908203125, |
| "B": -13.703113555908203, |
| "C": -16.397157669067383, |
| "D": -12.766101837158203, |
| "E": -16.364967346191406 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-189", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.20896244049072266, |
| "scores": { |
| "A": -14.54034423828125, |
| "B": -11.554760932922363, |
| "C": -11.811978340148926, |
| "D": -11.763723373413086, |
| "E": -13.348597526550293 |
| } |
| }, |
| "ablated": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.8642768859863281, |
| "scores": { |
| "A": -9.882810592651367, |
| "B": -10.419057846069336, |
| "C": -10.307378768920898, |
| "D": -9.554780960083008, |
| "E": -9.593378067016602 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-190", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.35360145568847656, |
| "scores": { |
| "A": -13.596860885620117, |
| "B": -10.771349906921387, |
| "C": -10.41774845123291, |
| "D": -13.349145889282227, |
| "E": -13.912391662597656 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.489037036895752, |
| "scores": { |
| "A": -6.3273138999938965, |
| "B": -9.247300148010254, |
| "C": -12.816350936889648, |
| "D": -10.787364959716797, |
| "E": -12.917289733886719 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-191", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.9574899673461914, |
| "scores": { |
| "A": -11.895600318908691, |
| "B": -10.9381103515625, |
| "C": -13.633337020874023, |
| "D": -14.099964141845703, |
| "E": -13.749225616455078 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.093125343322754, |
| "scores": { |
| "A": -6.434209823608398, |
| "B": -8.527335166931152, |
| "C": -11.775838851928711, |
| "D": -11.290367126464844, |
| "E": -12.324054718017578 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-192", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.11513137817382812, |
| "scores": { |
| "A": -13.689857482910156, |
| "B": -13.662582397460938, |
| "C": -13.574726104736328, |
| "D": -14.454401016235352, |
| "E": -13.602828979492188 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.6858510971069336, |
| "scores": { |
| "A": -8.88024616241455, |
| "B": -12.6011962890625, |
| "C": -13.179601669311523, |
| "D": -11.137103080749512, |
| "E": -10.566097259521484 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-193", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.7755870819091797, |
| "scores": { |
| "A": -11.522911071777344, |
| "B": -8.747323989868164, |
| "C": -9.848695755004883, |
| "D": -10.299760818481445, |
| "E": -10.045204162597656 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.2680301666259766, |
| "scores": { |
| "A": -8.59853458404541, |
| "B": -10.327086448669434, |
| "C": -10.31645679473877, |
| "D": -9.866564750671387, |
| "E": -9.983407974243164 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-194", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.1696929931640625, |
| "scores": { |
| "A": -9.464816093444824, |
| "B": -9.411468505859375, |
| "C": -10.639501571655273, |
| "D": -11.581161499023438, |
| "E": -12.365375518798828 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.69554328918457, |
| "scores": { |
| "A": -7.166990280151367, |
| "B": -12.7379150390625, |
| "C": -14.002235412597656, |
| "D": -13.862533569335938, |
| "E": -15.497852325439453 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-195", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -4.067173004150391, |
| "scores": { |
| "A": -11.221086502075195, |
| "B": -8.251949310302734, |
| "C": -10.201787948608398, |
| "D": -12.319122314453125, |
| "E": -12.611976623535156 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.214510917663574, |
| "scores": { |
| "A": -8.04576301574707, |
| "B": -9.183307647705078, |
| "C": -9.281808853149414, |
| "D": -10.260273933410645, |
| "E": -10.553353309631348 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-196", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.8260202407836914, |
| "scores": { |
| "A": -12.807720184326172, |
| "B": -10.98169994354248, |
| "C": -11.949183464050293, |
| "D": -12.729838371276855, |
| "E": -14.811100006103516 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.4137496948242188, |
| "scores": { |
| "A": -9.012284278869629, |
| "B": -11.167215347290039, |
| "C": -11.748849868774414, |
| "D": -10.426033973693848, |
| "E": -12.733590126037598 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-197", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.6986474990844727, |
| "scores": { |
| "A": -10.992959022521973, |
| "B": -8.939371109008789, |
| "C": -9.638018608093262, |
| "D": -9.939753532409668, |
| "E": -10.469696998596191 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.4545230865478516, |
| "scores": { |
| "A": -8.536977767944336, |
| "B": -10.056846618652344, |
| "C": -11.991500854492188, |
| "D": -8.791147232055664, |
| "E": -9.266132354736328 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-198", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.42527008056640625, |
| "scores": { |
| "A": -11.604852676391602, |
| "B": -9.509223937988281, |
| "C": -9.934494018554688, |
| "D": -13.406452178955078, |
| "E": -11.831525802612305 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.08353328704833984, |
| "scores": { |
| "A": -9.30318546295166, |
| "B": -11.121411323547363, |
| "C": -9.38671875, |
| "D": -11.00162410736084, |
| "E": -11.414689064025879 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-199", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.215788841247559, |
| "scores": { |
| "A": -10.268373489379883, |
| "B": -14.280426025390625, |
| "C": -15.484162330627441, |
| "D": -16.044178009033203, |
| "E": -13.105344772338867 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.14111328125, |
| "scores": { |
| "A": -9.658950805664062, |
| "B": -12.755328178405762, |
| "C": -14.800064086914062, |
| "D": -14.560892105102539, |
| "E": -15.274332046508789 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-200", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -0.2883167266845703, |
| "scores": { |
| "A": -12.25977897644043, |
| "B": -11.99930477142334, |
| "C": -13.633522033691406, |
| "D": -12.022893905639648, |
| "E": -11.71098804473877 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.265233039855957, |
| "scores": { |
| "A": -7.928126335144043, |
| "B": -11.193359375, |
| "C": -13.55146598815918, |
| "D": -9.998331069946289, |
| "E": -9.261889457702637 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-201", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.5516147613525391, |
| "scores": { |
| "A": -10.827482223510742, |
| "B": -10.275867462158203, |
| "C": -10.488014221191406, |
| "D": -11.649810791015625, |
| "E": -12.461782455444336 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.869196891784668, |
| "scores": { |
| "A": -9.49152660369873, |
| "B": -11.884713172912598, |
| "C": -13.91677188873291, |
| "D": -11.360723495483398, |
| "E": -14.611146926879883 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-202", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -1.6743907928466797, |
| "scores": { |
| "A": -11.541190147399902, |
| "B": -11.870600700378418, |
| "C": -10.76689338684082, |
| "D": -11.914441108703613, |
| "E": -10.196209907531738 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.4919681549072266, |
| "scores": { |
| "A": -6.873357772827148, |
| "B": -10.365325927734375, |
| "C": -13.835872650146484, |
| "D": -13.012420654296875, |
| "E": -13.628089904785156 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-203", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.915132522583008, |
| "scores": { |
| "A": -9.924501419067383, |
| "B": -9.837858200073242, |
| "C": -8.515426635742188, |
| "D": -11.430559158325195, |
| "E": -10.028295516967773 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.9277362823486328, |
| "scores": { |
| "A": -8.02107048034668, |
| "B": -9.770700454711914, |
| "C": -10.0626220703125, |
| "D": -8.948806762695312, |
| "E": -10.232612609863281 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-204", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.054775238037109375, |
| "scores": { |
| "A": -9.659589767456055, |
| "B": -9.96756362915039, |
| "C": -9.714365005493164, |
| "D": -11.157163619995117, |
| "E": -10.775384902954102 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.617983341217041, |
| "scores": { |
| "A": -6.012364864349365, |
| "B": -10.016752243041992, |
| "C": -10.630348205566406, |
| "D": -11.478163719177246, |
| "E": -11.222594261169434 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-205", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.753251075744629, |
| "scores": { |
| "A": -9.721230506896973, |
| "B": -8.277044296264648, |
| "C": -10.387093544006348, |
| "D": -11.78427791595459, |
| "E": -11.030295372009277 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.736443042755127, |
| "scores": { |
| "A": -7.844253063201904, |
| "B": -9.998329162597656, |
| "C": -11.479426383972168, |
| "D": -11.290699005126953, |
| "E": -12.580696105957031 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-206", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.18168067932128906, |
| "scores": { |
| "A": -11.602930068969727, |
| "B": -10.961795806884766, |
| "C": -11.143476486206055, |
| "D": -12.837438583374023, |
| "E": -14.00632095336914 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.0302047729492188, |
| "scores": { |
| "A": -8.013933181762695, |
| "B": -11.044137954711914, |
| "C": -12.337331771850586, |
| "D": -11.77204704284668, |
| "E": -14.158761024475098 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-207", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -3.332465171813965, |
| "scores": { |
| "A": -12.935659408569336, |
| "B": -13.335750579833984, |
| "C": -10.853610038757324, |
| "D": -15.803115844726562, |
| "E": -14.186075210571289 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -8.302919387817383, |
| "scores": { |
| "A": -6.643090724945068, |
| "B": -11.031190872192383, |
| "C": -12.203893661499023, |
| "D": -15.097414016723633, |
| "E": -14.94601058959961 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-208", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.8153915405273438, |
| "scores": { |
| "A": -12.07571029663086, |
| "B": -10.124650955200195, |
| "C": -11.729717254638672, |
| "D": -10.940042495727539, |
| "E": -13.055669784545898 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.7086524963378906, |
| "scores": { |
| "A": -9.813956260681152, |
| "B": -11.153253555297852, |
| "C": -13.102922439575195, |
| "D": -11.522608757019043, |
| "E": -12.09807300567627 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-209", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.9387226104736328, |
| "scores": { |
| "A": -10.875650405883789, |
| "B": -9.770784378051758, |
| "C": -13.167339324951172, |
| "D": -10.202999114990234, |
| "E": -8.832061767578125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.02905750274658203, |
| "scores": { |
| "A": -6.857519149780273, |
| "B": -8.824195861816406, |
| "C": -11.01872730255127, |
| "D": -7.9446258544921875, |
| "E": -6.828461647033691 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-210", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.5599374771118164, |
| "scores": { |
| "A": -10.820873260498047, |
| "B": -10.8345947265625, |
| "C": -10.908698081970215, |
| "D": -12.616942405700684, |
| "E": -11.380810737609863 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.7156219482421875, |
| "scores": { |
| "A": -8.604469299316406, |
| "B": -10.140972137451172, |
| "C": -9.98969841003418, |
| "D": -9.52783203125, |
| "E": -9.320091247558594 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-211", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.5010337829589844, |
| "scores": { |
| "A": -9.232921600341797, |
| "B": -8.809791564941406, |
| "C": -10.77252197265625, |
| "D": -11.31082534790039, |
| "E": -9.859048843383789 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.274179458618164, |
| "scores": { |
| "A": -6.612133979797363, |
| "B": -7.950355529785156, |
| "C": -10.069632530212402, |
| "D": -8.886313438415527, |
| "E": -8.989130020141602 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-212", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.515838623046875, |
| "scores": { |
| "A": -11.57960319519043, |
| "B": -9.604219436645508, |
| "C": -11.120058059692383, |
| "D": -11.739898681640625, |
| "E": -12.83167839050293 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.382899284362793, |
| "scores": { |
| "A": -4.981387138366699, |
| "B": -8.364286422729492, |
| "C": -11.265626907348633, |
| "D": -9.413225173950195, |
| "E": -11.893355369567871 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-213", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.7208938598632812, |
| "scores": { |
| "A": -13.409758567810059, |
| "B": -12.505935668945312, |
| "C": -15.226829528808594, |
| "D": -15.019231796264648, |
| "E": -13.97078800201416 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.587521553039551, |
| "scores": { |
| "A": -7.238009452819824, |
| "B": -11.375316619873047, |
| "C": -13.825531005859375, |
| "D": -12.193073272705078, |
| "E": -12.065244674682617 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-214", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -3.253298759460449, |
| "scores": { |
| "A": -10.732587814331055, |
| "B": -9.240824699401855, |
| "C": -8.385510444641113, |
| "D": -11.663347244262695, |
| "E": -11.638809204101562 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.647817611694336, |
| "scores": { |
| "A": -7.310015678405762, |
| "B": -10.763197898864746, |
| "C": -10.126215934753418, |
| "D": -9.662254333496094, |
| "E": -10.957833290100098 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-215", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -6.795169830322266, |
| "scores": { |
| "A": -22.99382781982422, |
| "B": -18.132843017578125, |
| "C": -16.198657989501953, |
| "D": -22.130014419555664, |
| "E": -22.1824893951416 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.6526165008544922, |
| "scores": { |
| "A": -8.266807556152344, |
| "B": -9.545059204101562, |
| "C": -8.919424057006836, |
| "D": -11.000604629516602, |
| "E": -11.521963119506836 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-216", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.6342000961303711, |
| "scores": { |
| "A": -12.792722702026367, |
| "B": -9.749557495117188, |
| "C": -10.383757591247559, |
| "D": -12.744852066040039, |
| "E": -12.79257583618164 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.935434341430664, |
| "scores": { |
| "A": -7.502462387084961, |
| "B": -9.681203842163086, |
| "C": -10.437896728515625, |
| "D": -10.083349227905273, |
| "E": -10.514593124389648 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-217", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.419353485107422, |
| "scores": { |
| "A": -12.823972702026367, |
| "B": -8.596864700317383, |
| "C": -12.047069549560547, |
| "D": -15.540777206420898, |
| "E": -12.016218185424805 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.360109329223633, |
| "scores": { |
| "A": -6.694489479064941, |
| "B": -7.449652671813965, |
| "C": -9.901532173156738, |
| "D": -10.334230422973633, |
| "E": -9.054598808288574 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-218", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.45700550079345703, |
| "scores": { |
| "A": -9.913926124572754, |
| "B": -10.370931625366211, |
| "C": -11.4942045211792, |
| "D": -12.226011276245117, |
| "E": -11.360005378723145 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 2.9241886138916016, |
| "scores": { |
| "A": -6.717060089111328, |
| "B": -9.64124870300293, |
| "C": -12.240556716918945, |
| "D": -12.319646835327148, |
| "E": -12.892836570739746 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-219", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.8197288513183594, |
| "scores": { |
| "A": -10.761848449707031, |
| "B": -10.766995429992676, |
| "C": -11.58157730102539, |
| "D": -11.309408187866211, |
| "E": -10.906158447265625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.418631553649902, |
| "scores": { |
| "A": -6.581605911254883, |
| "B": -10.751213073730469, |
| "C": -12.000237464904785, |
| "D": -12.113553047180176, |
| "E": -13.230367660522461 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-220", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.4230737686157227, |
| "scores": { |
| "A": -9.55500316619873, |
| "B": -10.829366683959961, |
| "C": -12.407928466796875, |
| "D": -12.210527420043945, |
| "E": -10.978076934814453 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -8.069978713989258, |
| "scores": { |
| "A": -6.391247272491455, |
| "B": -12.834989547729492, |
| "C": -14.329586029052734, |
| "D": -11.875961303710938, |
| "E": -14.461225509643555 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-221", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.1843414306640625, |
| "scores": { |
| "A": -13.276018142700195, |
| "B": -8.882017135620117, |
| "C": -8.697675704956055, |
| "D": -11.192451477050781, |
| "E": -12.547571182250977 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.23614788055419922, |
| "scores": { |
| "A": -7.982804298400879, |
| "B": -8.148253440856934, |
| "C": -7.680639266967773, |
| "D": -9.554062843322754, |
| "E": -7.916787147521973 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-222", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.8982372283935547, |
| "scores": { |
| "A": -10.80961799621582, |
| "B": -9.526689529418945, |
| "C": -12.049747467041016, |
| "D": -13.4249267578125, |
| "E": -14.212126731872559 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.658107757568359, |
| "scores": { |
| "A": -6.3620195388793945, |
| "B": -8.394012451171875, |
| "C": -11.750795364379883, |
| "D": -11.020127296447754, |
| "E": -11.600711822509766 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-223", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.090752601623535, |
| "scores": { |
| "A": -10.285022735595703, |
| "B": -7.155424118041992, |
| "C": -10.246176719665527, |
| "D": -10.93359375, |
| "E": -11.335384368896484 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.092008590698242, |
| "scores": { |
| "A": -4.6826276779174805, |
| "B": -8.774636268615723, |
| "C": -12.371101379394531, |
| "D": -11.170863151550293, |
| "E": -13.101846694946289 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-224", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.33624267578125, |
| "scores": { |
| "A": -9.653928756713867, |
| "B": -10.031352996826172, |
| "C": -10.045028686523438, |
| "D": -12.990171432495117, |
| "E": -13.378705978393555 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.7593231201171875, |
| "scores": { |
| "A": -6.229616165161133, |
| "B": -9.94646167755127, |
| "C": -11.709576606750488, |
| "D": -13.98893928527832, |
| "E": -14.078731536865234 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-225", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.9616122245788574, |
| "scores": { |
| "A": -9.889963150024414, |
| "B": -7.453649997711182, |
| "C": -8.71200942993164, |
| "D": -10.415262222290039, |
| "E": -10.100379943847656 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.348393440246582, |
| "scores": { |
| "A": -7.418603897094727, |
| "B": -8.728095054626465, |
| "C": -9.700243949890137, |
| "D": -8.766997337341309, |
| "E": -10.309947967529297 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-226", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.807805061340332, |
| "scores": { |
| "A": -9.130120277404785, |
| "B": -8.975384712219238, |
| "C": -8.7509126663208, |
| "D": -10.859039306640625, |
| "E": -9.558717727661133 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.761455535888672, |
| "scores": { |
| "A": -5.507261276245117, |
| "B": -8.399494171142578, |
| "C": -9.425889015197754, |
| "D": -8.896235466003418, |
| "E": -8.268716812133789 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-227", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.220867156982422, |
| "scores": { |
| "A": -14.620243072509766, |
| "B": -13.570455551147461, |
| "C": -11.349588394165039, |
| "D": -11.514875411987305, |
| "E": -11.799421310424805 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.755785942077637, |
| "scores": { |
| "A": -8.976465225219727, |
| "B": -13.732251167297363, |
| "C": -14.044622421264648, |
| "D": -10.583160400390625, |
| "E": -14.6570405960083 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-228", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.06908798217773438, |
| "scores": { |
| "A": -13.294260025024414, |
| "B": -10.70706558227539, |
| "C": -10.776153564453125, |
| "D": -14.082728385925293, |
| "E": -14.882830619812012 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.2796850204467773, |
| "scores": { |
| "A": -7.156650543212891, |
| "B": -8.436335563659668, |
| "C": -9.495584487915039, |
| "D": -10.117116928100586, |
| "E": -8.917889595031738 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-229", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -4.243276596069336, |
| "scores": { |
| "A": -13.26862907409668, |
| "B": -9.949518203735352, |
| "C": -14.192794799804688, |
| "D": -13.284774780273438, |
| "E": -10.620906829833984 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.551429748535156, |
| "scores": { |
| "A": -7.223015785217285, |
| "B": -8.682937622070312, |
| "C": -11.774445533752441, |
| "D": -8.85659408569336, |
| "E": -8.720208168029785 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-230", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.39587879180908203, |
| "scores": { |
| "A": -9.808959007263184, |
| "B": -9.706989288330078, |
| "C": -10.10286808013916, |
| "D": -11.030524253845215, |
| "E": -11.224230766296387 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.8247079849243164, |
| "scores": { |
| "A": -7.614650726318359, |
| "B": -10.1624174118042, |
| "C": -10.439358711242676, |
| "D": -8.709356307983398, |
| "E": -9.494927406311035 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-231", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -1.5809459686279297, |
| "scores": { |
| "A": -14.210000991821289, |
| "B": -11.604068756103516, |
| "C": -10.744209289550781, |
| "D": -14.023458480834961, |
| "E": -12.325155258178711 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.2639245986938477, |
| "scores": { |
| "A": -6.26756477355957, |
| "B": -7.757082939147949, |
| "C": -7.7208757400512695, |
| "D": -8.944690704345703, |
| "E": -8.531489372253418 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-232", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.394481658935547, |
| "scores": { |
| "A": -10.429929733276367, |
| "B": -8.23812484741211, |
| "C": -10.50958251953125, |
| "D": -10.852899551391602, |
| "E": -11.632606506347656 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.812300682067871, |
| "scores": { |
| "A": -6.170710563659668, |
| "B": -8.007887840270996, |
| "C": -9.030508041381836, |
| "D": -8.483603477478027, |
| "E": -10.983011245727539 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-233", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.781177520751953, |
| "scores": { |
| "A": -14.271334648132324, |
| "B": -11.490157127380371, |
| "C": -12.547819137573242, |
| "D": -12.209135055541992, |
| "E": -13.584470748901367 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.4316978454589844, |
| "scores": { |
| "A": -9.124231338500977, |
| "B": -9.750629425048828, |
| "C": -10.782835006713867, |
| "D": -9.555929183959961, |
| "E": -11.292634963989258 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-234", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.7564897537231445, |
| "scores": { |
| "A": -11.012960433959961, |
| "B": -9.256470680236816, |
| "C": -11.204137802124023, |
| "D": -11.356718063354492, |
| "E": -11.360950469970703 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.251865863800049, |
| "scores": { |
| "A": -5.590026378631592, |
| "B": -8.84189224243164, |
| "C": -12.897504806518555, |
| "D": -9.304344177246094, |
| "E": -11.349357604980469 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-235", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.0572433471679688, |
| "scores": { |
| "A": -11.001571655273438, |
| "B": -8.944328308105469, |
| "C": -10.169864654541016, |
| "D": -11.891605377197266, |
| "E": -10.417022705078125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.4765863418579102, |
| "scores": { |
| "A": -7.271252632141113, |
| "B": -8.747838973999023, |
| "C": -9.36155891418457, |
| "D": -10.69479751586914, |
| "E": -9.831280708312988 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-236", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.42516517639160156, |
| "scores": { |
| "A": -10.618110656738281, |
| "B": -10.19294548034668, |
| "C": -10.441062927246094, |
| "D": -13.496698379516602, |
| "E": -11.223958969116211 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 2.4833927154541016, |
| "scores": { |
| "A": -7.2196502685546875, |
| "B": -9.703042984008789, |
| "C": -9.82933235168457, |
| "D": -12.09399700164795, |
| "E": -12.26220989227295 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-237", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.568614959716797, |
| "scores": { |
| "A": -14.478540420532227, |
| "B": -14.94182014465332, |
| "C": -18.723243713378906, |
| "D": -17.047155380249023, |
| "E": -17.837533950805664 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.232480049133301, |
| "scores": { |
| "A": -8.632494926452637, |
| "B": -12.854435920715332, |
| "C": -15.726949691772461, |
| "D": -11.864974975585938, |
| "E": -14.554712295532227 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-238", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.7253379821777344, |
| "scores": { |
| "A": -12.960672378540039, |
| "B": -12.267729759216309, |
| "C": -12.900382995605469, |
| "D": -11.542391777038574, |
| "E": -13.208388328552246 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.6987123489379883, |
| "scores": { |
| "A": -8.835456848144531, |
| "B": -11.53416919708252, |
| "C": -12.405285835266113, |
| "D": -10.377281188964844, |
| "E": -12.166045188903809 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-239", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -1.5075511932373047, |
| "scores": { |
| "A": -10.213048934936523, |
| "B": -9.266622543334961, |
| "C": -8.705497741699219, |
| "D": -12.055715560913086, |
| "E": -11.688860893249512 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.3363800048828125, |
| "scores": { |
| "A": -6.8600263595581055, |
| "B": -10.196406364440918, |
| "C": -11.205855369567871, |
| "D": -13.323162078857422, |
| "E": -12.765695571899414 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-240", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.44985103607177734, |
| "scores": { |
| "A": -15.143648147583008, |
| "B": -14.28840446472168, |
| "C": -18.351917266845703, |
| "D": -14.222793579101562, |
| "E": -14.67264461517334 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.2502288818359375, |
| "scores": { |
| "A": -9.40713882446289, |
| "B": -11.652059555053711, |
| "C": -14.1944580078125, |
| "D": -11.099084854125977, |
| "E": -12.657367706298828 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-241", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -4.373614311218262, |
| "scores": { |
| "A": -10.808015823364258, |
| "B": -8.874855041503906, |
| "C": -10.419958114624023, |
| "D": -13.248469352722168, |
| "E": -12.38242244720459 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.841573715209961, |
| "scores": { |
| "A": -8.555763244628906, |
| "B": -12.585406303405762, |
| "C": -12.206342697143555, |
| "D": -12.397336959838867, |
| "E": -11.421467781066895 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-242", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.8799476623535156, |
| "scores": { |
| "A": -9.328302383422852, |
| "B": -10.89498519897461, |
| "C": -12.208250045776367, |
| "D": -12.965248107910156, |
| "E": -11.71044921875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.6321868896484375, |
| "scores": { |
| "A": -6.5676727294921875, |
| "B": -11.704992294311523, |
| "C": -14.199859619140625, |
| "D": -14.442285537719727, |
| "E": -14.39041519165039 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-243", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -6.371611595153809, |
| "scores": { |
| "A": -15.65360164642334, |
| "B": -9.281990051269531, |
| "C": -11.275196075439453, |
| "D": -13.858661651611328, |
| "E": -12.75399398803711 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.5444035530090332, |
| "scores": { |
| "A": -8.703460693359375, |
| "B": -7.159057140350342, |
| "C": -9.622814178466797, |
| "D": -11.823188781738281, |
| "E": -12.516307830810547 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-244", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.365335464477539, |
| "scores": { |
| "A": -13.74870491027832, |
| "B": -12.62894058227539, |
| "C": -11.383369445800781, |
| "D": -12.943933486938477, |
| "E": -12.694005966186523 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.0395584106445312, |
| "scores": { |
| "A": -6.322815895080566, |
| "B": -9.362374305725098, |
| "C": -10.0859956741333, |
| "D": -10.893784523010254, |
| "E": -12.42809772491455 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-245", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.3645496368408203, |
| "scores": { |
| "A": -11.810342788696289, |
| "B": -11.149602890014648, |
| "C": -11.514152526855469, |
| "D": -13.367142677307129, |
| "E": -13.141554832458496 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.503069877624512, |
| "scores": { |
| "A": -7.401782989501953, |
| "B": -13.384897232055664, |
| "C": -13.904852867126465, |
| "D": -14.09384822845459, |
| "E": -15.8029146194458 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-246", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -1.197403907775879, |
| "scores": { |
| "A": -13.818582534790039, |
| "B": -14.779563903808594, |
| "C": -15.105998992919922, |
| "D": -14.267341613769531, |
| "E": -13.582159996032715 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.314557075500488, |
| "scores": { |
| "A": -9.649901390075684, |
| "B": -13.964458465576172, |
| "C": -16.527318954467773, |
| "D": -12.236238479614258, |
| "E": -13.367414474487305 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-247", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -4.482232093811035, |
| "scores": { |
| "A": -15.076606750488281, |
| "B": -10.594374656677246, |
| "C": -13.674264907836914, |
| "D": -13.522222518920898, |
| "E": -13.801025390625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.8972492218017578, |
| "scores": { |
| "A": -9.801979064941406, |
| "B": -11.38330078125, |
| "C": -12.110631942749023, |
| "D": -10.699228286743164, |
| "E": -12.660514831542969 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-248", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -3.480362892150879, |
| "scores": { |
| "A": -14.004387855529785, |
| "B": -13.113205909729004, |
| "C": -13.897481918334961, |
| "D": -15.721760749816895, |
| "E": -12.241397857666016 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.1865768432617188, |
| "scores": { |
| "A": -8.702154159545898, |
| "B": -9.582113265991211, |
| "C": -11.823348999023438, |
| "D": -11.888731002807617, |
| "E": -9.788232803344727 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-249", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.821441650390625, |
| "scores": { |
| "A": -10.321834564208984, |
| "B": -8.848502159118652, |
| "C": -8.027060508728027, |
| "D": -11.628623962402344, |
| "E": -11.091792106628418 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.602280616760254, |
| "scores": { |
| "A": -6.007650375366211, |
| "B": -7.634098052978516, |
| "C": -8.609930992126465, |
| "D": -8.443798065185547, |
| "E": -8.685563087463379 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-250", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -1.9709901809692383, |
| "scores": { |
| "A": -10.511480331420898, |
| "B": -11.609233856201172, |
| "C": -10.008577346801758, |
| "D": -14.576160430908203, |
| "E": -11.979567527770996 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.598491668701172, |
| "scores": { |
| "A": -8.06104850769043, |
| "B": -11.577505111694336, |
| "C": -12.608949661254883, |
| "D": -14.756206512451172, |
| "E": -15.659540176391602 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-251", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.07524585723876953, |
| "scores": { |
| "A": -9.908409118652344, |
| "B": -9.833163261413574, |
| "C": -12.424334526062012, |
| "D": -11.275071144104004, |
| "E": -10.72103214263916 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.978281021118164, |
| "scores": { |
| "A": -9.318894386291504, |
| "B": -12.297175407409668, |
| "C": -13.513100624084473, |
| "D": -12.114720344543457, |
| "E": -11.161179542541504 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-252", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -0.18950843811035156, |
| "scores": { |
| "A": -10.37520980834961, |
| "B": -9.517382621765137, |
| "C": -9.654356002807617, |
| "D": -10.586039543151855, |
| "E": -9.464847564697266 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.239790916442871, |
| "scores": { |
| "A": -6.840622901916504, |
| "B": -8.500364303588867, |
| "C": -9.080413818359375, |
| "D": -9.140447616577148, |
| "E": -8.753503799438477 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-253", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.153468132019043, |
| "scores": { |
| "A": -12.344278335571289, |
| "B": -10.064801216125488, |
| "C": -10.924477577209473, |
| "D": -12.967808723449707, |
| "E": -13.218269348144531 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.156527519226074, |
| "scores": { |
| "A": -4.787992477416992, |
| "B": -6.941324234008789, |
| "C": -8.889881134033203, |
| "D": -9.577656745910645, |
| "E": -11.944519996643066 |
| } |
| } |
| } |
| ], |
| "flip_rows": [ |
| { |
| "ex_id": "aqua-test-2", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2555389404296875, |
| "scores": { |
| "A": -11.233211517333984, |
| "B": -10.210750579833984, |
| "C": -13.17569351196289, |
| "D": -12.437894821166992, |
| "E": -10.466289520263672 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.949008941650391, |
| "scores": { |
| "A": -6.06699275970459, |
| "B": -14.01600170135498, |
| "C": -17.137845993041992, |
| "D": -15.27363109588623, |
| "E": -15.64785099029541 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.3901653289794922, |
| "scores": { |
| "A": -9.876066207885742, |
| "B": -8.48590087890625, |
| "C": -10.311349868774414, |
| "D": -10.88787841796875, |
| "E": -10.712956428527832 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.25553417205810547, |
| "scores": { |
| "A": -11.233206748962402, |
| "B": -10.210748672485352, |
| "C": -13.175691604614258, |
| "D": -12.437891006469727, |
| "E": -10.466282844543457 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.25553417205810547, |
| "scores": { |
| "A": -11.233206748962402, |
| "B": -10.210748672485352, |
| "C": -13.175691604614258, |
| "D": -12.437891006469727, |
| "E": -10.466282844543457 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -9.454591751098633, |
| "scores": { |
| "A": -5.419614791870117, |
| "B": -14.87420654296875, |
| "C": -18.10893440246582, |
| "D": -16.861085891723633, |
| "E": -17.0190372467041 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.7489757537841797, |
| "scores": { |
| "A": -10.085409164428711, |
| "B": -8.336433410644531, |
| "C": -10.132183074951172, |
| "D": -10.820955276489258, |
| "E": -10.653312683105469 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.734705924987793, |
| "scores": { |
| "A": -4.145016670227051, |
| "B": -10.879722595214844, |
| "C": -13.257842063903809, |
| "D": -13.037062644958496, |
| "E": -12.864790916442871 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.94901180267334, |
| "scores": { |
| "A": -6.066986083984375, |
| "B": -14.015997886657715, |
| "C": -17.13784408569336, |
| "D": -15.273627281188965, |
| "E": -15.647849082946777 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-5", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953394889831543, |
| "scores": { |
| "A": -11.989723205566406, |
| "B": -10.97428035736084, |
| "C": -12.035185813903809, |
| "D": -11.961091041564941, |
| "E": -11.927675247192383 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.1877222061157227, |
| "scores": { |
| "A": -7.596570014953613, |
| "B": -9.784292221069336, |
| "C": -11.036355018615723, |
| "D": -9.200647354125977, |
| "E": -10.078826904296875 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.16080760955810547, |
| "scores": { |
| "A": -9.10636043548584, |
| "B": -8.253414154052734, |
| "C": -8.41422176361084, |
| "D": -9.317205429077148, |
| "E": -9.607017517089844 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.9533920288085938, |
| "scores": { |
| "A": -11.989713668823242, |
| "B": -10.974275588989258, |
| "C": -12.035181045532227, |
| "D": -11.96108627319336, |
| "E": -11.927667617797852 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.9533920288085938, |
| "scores": { |
| "A": -11.989713668823242, |
| "B": -10.974275588989258, |
| "C": -12.035181045532227, |
| "D": -11.96108627319336, |
| "E": -11.927667617797852 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.1618080139160156, |
| "scores": { |
| "A": -4.276651382446289, |
| "B": -5.438459396362305, |
| "C": -7.316925048828125, |
| "D": -5.728630065917969, |
| "E": -5.395031929016113 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.10317325592041016, |
| "scores": { |
| "A": -9.095845222473145, |
| "B": -8.302581787109375, |
| "C": -8.405755043029785, |
| "D": -9.327710151672363, |
| "E": -9.620680809020996 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.9572091102600098, |
| "scores": { |
| "A": -7.572333812713623, |
| "B": -10.529542922973633, |
| "C": -11.914779663085938, |
| "D": -11.758302688598633, |
| "E": -10.997583389282227 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.1877198219299316, |
| "scores": { |
| "A": -7.59656286239624, |
| "B": -9.784282684326172, |
| "C": -11.036344528198242, |
| "D": -9.200637817382812, |
| "E": -10.078821182250977 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-9", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.0833330154418945, |
| "scores": { |
| "A": -11.261035919189453, |
| "B": -8.873366355895996, |
| "C": -9.95669937133789, |
| "D": -12.33233642578125, |
| "E": -13.964797973632812 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.0129852294921875, |
| "scores": { |
| "A": -7.305376052856445, |
| "B": -11.318361282348633, |
| "C": -11.48718547821045, |
| "D": -13.66738224029541, |
| "E": -15.269938468933105 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1214942932128906, |
| "scores": { |
| "A": -10.059557914733887, |
| "B": -7.437822341918945, |
| "C": -8.559316635131836, |
| "D": -10.860220909118652, |
| "E": -12.348688125610352 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.0833320617675781, |
| "scores": { |
| "A": -11.26103401184082, |
| "B": -8.873364448547363, |
| "C": -9.956696510314941, |
| "D": -12.332334518432617, |
| "E": -13.964792251586914 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.0833320617675781, |
| "scores": { |
| "A": -11.26103401184082, |
| "B": -8.873364448547363, |
| "C": -9.956696510314941, |
| "D": -12.332334518432617, |
| "E": -13.964792251586914 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.571709632873535, |
| "scores": { |
| "A": -4.2528157234191895, |
| "B": -7.824525356292725, |
| "C": -7.8429999351501465, |
| "D": -10.935527801513672, |
| "E": -12.20101547241211 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.0738887786865234, |
| "scores": { |
| "A": -9.967976570129395, |
| "B": -7.351049423217773, |
| "C": -8.424938201904297, |
| "D": -10.737732887268066, |
| "E": -12.164700508117676 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.493661880493164, |
| "scores": { |
| "A": -6.91084098815918, |
| "B": -10.404502868652344, |
| "C": -11.570510864257812, |
| "D": -13.403081893920898, |
| "E": -13.38132095336914 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.012986183166504, |
| "scores": { |
| "A": -7.3053789138793945, |
| "B": -11.318365097045898, |
| "C": -11.487188339233398, |
| "D": -13.667381286621094, |
| "E": -15.269935607910156 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-15", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6604747772216797, |
| "scores": { |
| "A": -11.07632064819336, |
| "B": -10.41584587097168, |
| "C": -13.610551834106445, |
| "D": -15.297096252441406, |
| "E": -13.782489776611328 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.743229389190674, |
| "scores": { |
| "A": -6.1119704246521, |
| "B": -10.855199813842773, |
| "C": -11.251523971557617, |
| "D": -11.053302764892578, |
| "E": -13.566537857055664 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7353744506835938, |
| "scores": { |
| "A": -9.318084716796875, |
| "B": -8.582710266113281, |
| "C": -10.295574188232422, |
| "D": -11.628917694091797, |
| "E": -11.761164665222168 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6604728698730469, |
| "scores": { |
| "A": -11.076318740844727, |
| "B": -10.41584587097168, |
| "C": -13.610550880432129, |
| "D": -15.297094345092773, |
| "E": -13.782489776611328 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6604728698730469, |
| "scores": { |
| "A": -11.076318740844727, |
| "B": -10.41584587097168, |
| "C": -13.610550880432129, |
| "D": -15.297094345092773, |
| "E": -13.782489776611328 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.7481865882873535, |
| "scores": { |
| "A": -7.6179327964782715, |
| "B": -11.366119384765625, |
| "C": -11.271610260009766, |
| "D": -11.955974578857422, |
| "E": -14.104389190673828 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.4087352752685547, |
| "scores": { |
| "A": -9.301450729370117, |
| "B": -8.892715454101562, |
| "C": -10.551101684570312, |
| "D": -11.64991569519043, |
| "E": -11.84024429321289 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.2450222969055176, |
| "scores": { |
| "A": -5.506385326385498, |
| "B": -8.751407623291016, |
| "C": -10.76029109954834, |
| "D": -10.876399040222168, |
| "E": -11.42264461517334 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.743227005004883, |
| "scores": { |
| "A": -6.111969947814941, |
| "B": -10.855196952819824, |
| "C": -11.251523971557617, |
| "D": -11.053295135498047, |
| "E": -13.566534042358398 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-16", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.796067237854004, |
| "scores": { |
| "A": -12.479905128479004, |
| "B": -10.507231712341309, |
| "C": -7.711164474487305, |
| "D": -12.827747344970703, |
| "E": -12.807977676391602 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.861086368560791, |
| "scores": { |
| "A": -7.834758281707764, |
| "B": -9.467061996459961, |
| "C": -8.695844650268555, |
| "D": -9.597942352294922, |
| "E": -11.696287155151367 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.3996143341064453, |
| "scores": { |
| "A": -10.024466514587402, |
| "B": -9.449155807495117, |
| "C": -8.049541473388672, |
| "D": -10.041764259338379, |
| "E": -11.3864164352417 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.7960658073425293, |
| "scores": { |
| "A": -12.479902267456055, |
| "B": -10.507226943969727, |
| "C": -7.711161136627197, |
| "D": -12.827741622924805, |
| "E": -12.807975769042969 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.7960658073425293, |
| "scores": { |
| "A": -12.479902267456055, |
| "B": -10.507226943969727, |
| "C": -7.711161136627197, |
| "D": -12.827741622924805, |
| "E": -12.807975769042969 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.8937911987304688, |
| "scores": { |
| "A": -9.395095825195312, |
| "B": -11.238801956176758, |
| "C": -11.288887023925781, |
| "D": -11.238336563110352, |
| "E": -13.501079559326172 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.4178781509399414, |
| "scores": { |
| "A": -9.956263542175293, |
| "B": -9.215209007263184, |
| "C": -7.797330856323242, |
| "D": -9.89533805847168, |
| "E": -11.219152450561523 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.8867502212524414, |
| "scores": { |
| "A": -6.518403053283691, |
| "B": -7.824748992919922, |
| "C": -7.405153274536133, |
| "D": -6.778932571411133, |
| "E": -8.86016845703125 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.8610877990722656, |
| "scores": { |
| "A": -7.83476448059082, |
| "B": -9.467066764831543, |
| "C": -8.695852279663086, |
| "D": -9.597952842712402, |
| "E": -11.696298599243164 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-21", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7691888809204102, |
| "scores": { |
| "A": -10.39490795135498, |
| "B": -9.62571907043457, |
| "C": -12.538268089294434, |
| "D": -12.220020294189453, |
| "E": -11.351235389709473 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.5374608039855957, |
| "scores": { |
| "A": -6.129680156707764, |
| "B": -9.66714096069336, |
| "C": -10.800978660583496, |
| "D": -10.67288875579834, |
| "E": -11.187762260437012 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.620218276977539, |
| "scores": { |
| "A": -9.997575759887695, |
| "B": -8.377357482910156, |
| "C": -10.22830867767334, |
| "D": -10.391581535339355, |
| "E": -11.034626960754395 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7691860198974609, |
| "scores": { |
| "A": -10.394902229309082, |
| "B": -9.625716209411621, |
| "C": -12.538259506225586, |
| "D": -12.220011711120605, |
| "E": -11.351226806640625 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7691860198974609, |
| "scores": { |
| "A": -10.394902229309082, |
| "B": -9.625716209411621, |
| "C": -12.538259506225586, |
| "D": -12.220011711120605, |
| "E": -11.351226806640625 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.422626972198486, |
| "scores": { |
| "A": -5.9301066398620605, |
| "B": -10.352733612060547, |
| "C": -10.554861068725586, |
| "D": -10.926000595092773, |
| "E": -12.400789260864258 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.7119264602661133, |
| "scores": { |
| "A": -10.018722534179688, |
| "B": -8.272918701171875, |
| "C": -9.984845161437988, |
| "D": -10.223934173583984, |
| "E": -10.815324783325195 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.364543914794922, |
| "scores": { |
| "A": -10.713298797607422, |
| "B": -13.077842712402344, |
| "C": -13.290660858154297, |
| "D": -12.429567337036133, |
| "E": -14.533975601196289 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.5374622344970703, |
| "scores": { |
| "A": -6.129676818847656, |
| "B": -9.667139053344727, |
| "C": -10.800968170166016, |
| "D": -10.67288589477539, |
| "E": -11.18775463104248 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-25", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.06520843505859375, |
| "scores": { |
| "A": -12.949111938476562, |
| "B": -12.246522903442383, |
| "C": -12.181314468383789, |
| "D": -12.397541046142578, |
| "E": -13.614669799804688 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.5791339874267578, |
| "scores": { |
| "A": -8.643856048583984, |
| "B": -10.894746780395508, |
| "C": -10.222990036010742, |
| "D": -9.472063064575195, |
| "E": -10.86764144897461 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.1788043975830078, |
| "scores": { |
| "A": -9.795440673828125, |
| "B": -8.286870002746582, |
| "C": -8.46567440032959, |
| "D": -9.396930694580078, |
| "E": -10.46731185913086 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.06521415710449219, |
| "scores": { |
| "A": -12.949119567871094, |
| "B": -12.246532440185547, |
| "C": -12.181318283081055, |
| "D": -12.397550582885742, |
| "E": -13.614681243896484 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.06521415710449219, |
| "scores": { |
| "A": -12.949119567871094, |
| "B": -12.246532440185547, |
| "C": -12.181318283081055, |
| "D": -12.397550582885742, |
| "E": -13.614681243896484 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.7965564727783203, |
| "scores": { |
| "A": -5.875622272491455, |
| "B": -8.843538284301758, |
| "C": -7.672178745269775, |
| "D": -7.943771839141846, |
| "E": -9.51207447052002 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.07470321655273438, |
| "scores": { |
| "A": -9.802331924438477, |
| "B": -8.519807815551758, |
| "C": -8.594511032104492, |
| "D": -9.518583297729492, |
| "E": -10.49337387084961 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.5046224594116211, |
| "scores": { |
| "A": -7.623855113983154, |
| "B": -8.038482666015625, |
| "C": -6.829197406768799, |
| "D": -7.33381986618042, |
| "E": -8.407615661621094 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.579136848449707, |
| "scores": { |
| "A": -8.643851280212402, |
| "B": -10.894744873046875, |
| "C": -10.22298812866211, |
| "D": -9.472061157226562, |
| "E": -10.86764144897461 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-33", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.2154502868652344, |
| "scores": { |
| "A": -17.279247283935547, |
| "B": -18.187232971191406, |
| "C": -16.063796997070312, |
| "D": -19.143869400024414, |
| "E": -19.470874786376953 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.4487724304199219, |
| "scores": { |
| "A": -9.145519256591797, |
| "B": -10.157659530639648, |
| "C": -9.594291687011719, |
| "D": -10.095281600952148, |
| "E": -10.523807525634766 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.9382915496826172, |
| "scores": { |
| "A": -10.902482032775879, |
| "B": -9.475619316101074, |
| "C": -8.537327766418457, |
| "D": -10.088809967041016, |
| "E": -11.045086860656738 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.2154521942138672, |
| "scores": { |
| "A": -17.27924156188965, |
| "B": -18.187225341796875, |
| "C": -16.06378936767578, |
| "D": -19.143863677978516, |
| "E": -19.470867156982422 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.2154521942138672, |
| "scores": { |
| "A": -17.27924156188965, |
| "B": -18.187225341796875, |
| "C": -16.06378936767578, |
| "D": -19.143863677978516, |
| "E": -19.470867156982422 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.7954683303833008, |
| "scores": { |
| "A": -6.031587600708008, |
| "B": -6.501745223999023, |
| "C": -6.827055931091309, |
| "D": -8.70362663269043, |
| "E": -9.60076904296875 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.8526973724365234, |
| "scores": { |
| "A": -10.536826133728027, |
| "B": -8.904891014099121, |
| "C": -8.052193641662598, |
| "D": -9.56973648071289, |
| "E": -10.570178031921387 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.2699460983276367, |
| "scores": { |
| "A": -6.845300674438477, |
| "B": -7.745532989501953, |
| "C": -10.115246772766113, |
| "D": -9.863080024719238, |
| "E": -9.416638374328613 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.44877052307128906, |
| "scores": { |
| "A": -9.145517349243164, |
| "B": -10.157659530639648, |
| "C": -9.594287872314453, |
| "D": -10.095277786254883, |
| "E": -10.523809432983398 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-39", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.704728126525879, |
| "scores": { |
| "A": -10.207995414733887, |
| "B": -11.912723541259766, |
| "C": -12.109935760498047, |
| "D": -14.276583671569824, |
| "E": -13.992156982421875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.4076976776123047, |
| "scores": { |
| "A": -10.194977760314941, |
| "B": -10.153923988342285, |
| "C": -9.787280082702637, |
| "D": -11.554168701171875, |
| "E": -10.806174278259277 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.2574920654296875, |
| "scores": { |
| "A": -9.42642593383789, |
| "B": -9.74307632446289, |
| "C": -9.168933868408203, |
| "D": -10.273834228515625, |
| "E": -9.463874816894531 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.7047252655029297, |
| "scores": { |
| "A": -10.207992553710938, |
| "B": -11.912717819213867, |
| "C": -12.109930038452148, |
| "D": -14.276582717895508, |
| "E": -13.99215316772461 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.7047252655029297, |
| "scores": { |
| "A": -10.207992553710938, |
| "B": -11.912717819213867, |
| "C": -12.109930038452148, |
| "D": -14.276582717895508, |
| "E": -13.99215316772461 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.4817485809326172, |
| "scores": { |
| "A": -8.8492431640625, |
| "B": -9.330991744995117, |
| "C": -9.828338623046875, |
| "D": -11.093040466308594, |
| "E": -10.96759033203125 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.15412521362304688, |
| "scores": { |
| "A": -9.60675048828125, |
| "B": -10.024761199951172, |
| "C": -9.452625274658203, |
| "D": -10.476186752319336, |
| "E": -9.598836898803711 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.17638206481933594, |
| "scores": { |
| "A": -9.201507568359375, |
| "B": -9.28551959991455, |
| "C": -9.025125503540039, |
| "D": -10.745898246765137, |
| "E": -10.075105667114258 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.4076995849609375, |
| "scores": { |
| "A": -10.194982528686523, |
| "B": -10.153924942016602, |
| "C": -9.787282943725586, |
| "D": -11.554170608520508, |
| "E": -10.806177139282227 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-47", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.20550537109375, |
| "scores": { |
| "A": -11.954267501831055, |
| "B": -12.503751754760742, |
| "C": -12.114371299743652, |
| "D": -13.045472145080566, |
| "E": -11.748762130737305 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.0157623291015625, |
| "scores": { |
| "A": -9.386420249938965, |
| "B": -11.835212707519531, |
| "C": -13.338075637817383, |
| "D": -12.148918151855469, |
| "E": -13.402182579040527 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.8217716217041016, |
| "scores": { |
| "A": -11.986827850341797, |
| "B": -12.437498092651367, |
| "C": -11.280074119567871, |
| "D": -12.387033462524414, |
| "E": -12.101845741271973 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.20550537109375, |
| "scores": { |
| "A": -11.954263687133789, |
| "B": -12.503748893737793, |
| "C": -12.114365577697754, |
| "D": -13.045466423034668, |
| "E": -11.748758316040039 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.20550537109375, |
| "scores": { |
| "A": -11.954263687133789, |
| "B": -12.503748893737793, |
| "C": -12.114365577697754, |
| "D": -13.045466423034668, |
| "E": -11.748758316040039 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.8673534393310547, |
| "scores": { |
| "A": -8.755905151367188, |
| "B": -11.388154983520508, |
| "C": -13.46282958984375, |
| "D": -11.259201049804688, |
| "E": -12.623258590698242 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.8881673812866211, |
| "scores": { |
| "A": -11.76992416381836, |
| "B": -12.467824935913086, |
| "C": -11.171812057495117, |
| "D": -12.422957420349121, |
| "E": -12.059979438781738 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.76485538482666, |
| "scores": { |
| "A": -6.755273818969727, |
| "B": -9.245403289794922, |
| "C": -11.986200332641602, |
| "D": -10.68335247039795, |
| "E": -10.520129203796387 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.0157623291015625, |
| "scores": { |
| "A": -9.386425018310547, |
| "B": -11.835214614868164, |
| "C": -13.338083267211914, |
| "D": -12.148921966552734, |
| "E": -13.40218734741211 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-52", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.04578971862792969, |
| "scores": { |
| "A": -12.882274627685547, |
| "B": -9.855215072631836, |
| "C": -9.901004791259766, |
| "D": -11.499755859375, |
| "E": -10.678110122680664 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.4922494888305664, |
| "scores": { |
| "A": -5.136632442474365, |
| "B": -5.628881931304932, |
| "C": -6.605200290679932, |
| "D": -6.88695764541626, |
| "E": -6.429419994354248 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.19986343383789062, |
| "scores": { |
| "A": -10.225756645202637, |
| "B": -6.857089042663574, |
| "C": -7.056952476501465, |
| "D": -9.577616691589355, |
| "E": -9.770869255065918 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.04578971862792969, |
| "scores": { |
| "A": -12.882272720336914, |
| "B": -9.855213165283203, |
| "C": -9.901002883911133, |
| "D": -11.499753952026367, |
| "E": -10.678108215332031 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.04578971862792969, |
| "scores": { |
| "A": -12.882272720336914, |
| "B": -9.855213165283203, |
| "C": -9.901002883911133, |
| "D": -11.499753952026367, |
| "E": -10.678108215332031 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.052666664123535156, |
| "scores": { |
| "A": -9.966214179992676, |
| "B": -9.754355430603027, |
| "C": -12.090447425842285, |
| "D": -10.788393020629883, |
| "E": -9.807022094726562 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.09168434143066406, |
| "scores": { |
| "A": -10.196533203125, |
| "B": -6.679924488067627, |
| "C": -6.771608829498291, |
| "D": -9.311458587646484, |
| "E": -9.494468688964844 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.650324821472168, |
| "scores": { |
| "A": -10.846677780151367, |
| "B": -11.497002601623535, |
| "C": -15.689571380615234, |
| "D": -14.434303283691406, |
| "E": -13.255485534667969 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.4922494888305664, |
| "scores": { |
| "A": -5.136631488800049, |
| "B": -5.628880977630615, |
| "C": -6.605197429656982, |
| "D": -6.886956691741943, |
| "E": -6.429421901702881 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-57", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.14557647705078125, |
| "scores": { |
| "A": -13.898555755615234, |
| "B": -12.992910385131836, |
| "C": -14.371723175048828, |
| "D": -14.158893585205078, |
| "E": -13.138486862182617 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.6548147201538086, |
| "scores": { |
| "A": -10.324930191040039, |
| "B": -12.979744911193848, |
| "C": -12.848653793334961, |
| "D": -12.86312484741211, |
| "E": -12.547582626342773 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.3462409973144531, |
| "scores": { |
| "A": -12.841978073120117, |
| "B": -12.433549880981445, |
| "C": -13.434564590454102, |
| "D": -13.763471603393555, |
| "E": -12.779790878295898 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.14557647705078125, |
| "scores": { |
| "A": -13.898560523986816, |
| "B": -12.992914199829102, |
| "C": -14.371731758117676, |
| "D": -14.158897399902344, |
| "E": -13.138490676879883 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.14557647705078125, |
| "scores": { |
| "A": -13.898560523986816, |
| "B": -12.992914199829102, |
| "C": -14.371731758117676, |
| "D": -14.158897399902344, |
| "E": -13.138490676879883 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.6947002410888672, |
| "scores": { |
| "A": -8.799111366271973, |
| "B": -10.49381160736084, |
| "C": -10.079784393310547, |
| "D": -11.49155044555664, |
| "E": -11.507024765014648 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.21564388275146484, |
| "scores": { |
| "A": -12.912389755249023, |
| "B": -12.696745872497559, |
| "C": -13.669480323791504, |
| "D": -13.951339721679688, |
| "E": -12.927538871765137 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.7318696975708008, |
| "scores": { |
| "A": -10.47717571258545, |
| "B": -12.20904541015625, |
| "C": -12.458807945251465, |
| "D": -13.046340942382812, |
| "E": -11.932548522949219 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.6548194885253906, |
| "scores": { |
| "A": -10.324928283691406, |
| "B": -12.979747772216797, |
| "C": -12.848652839660645, |
| "D": -12.863126754760742, |
| "E": -12.547584533691406 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-68", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6945219039916992, |
| "scores": { |
| "A": -11.65401840209961, |
| "B": -10.95949649810791, |
| "C": -11.869510650634766, |
| "D": -12.070514678955078, |
| "E": -12.618841171264648 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.171473503112793, |
| "scores": { |
| "A": -9.029966354370117, |
| "B": -11.20143985748291, |
| "C": -11.244144439697266, |
| "D": -11.500038146972656, |
| "E": -10.598958015441895 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.03911018371582031, |
| "scores": { |
| "A": -10.191217422485352, |
| "B": -9.777387619018555, |
| "C": -9.816497802734375, |
| "D": -11.066892623901367, |
| "E": -10.560269355773926 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6945209503173828, |
| "scores": { |
| "A": -11.654026985168457, |
| "B": -10.959506034851074, |
| "C": -11.869518280029297, |
| "D": -12.07052230834961, |
| "E": -12.61884593963623 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6945209503173828, |
| "scores": { |
| "A": -11.654026985168457, |
| "B": -10.959506034851074, |
| "C": -11.869518280029297, |
| "D": -12.07052230834961, |
| "E": -12.61884593963623 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.0571775436401367, |
| "scores": { |
| "A": -8.271219253540039, |
| "B": -10.328396797180176, |
| "C": -9.616171836853027, |
| "D": -10.940016746520996, |
| "E": -9.240631103515625 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.05071544647216797, |
| "scores": { |
| "A": -10.167059898376465, |
| "B": -9.610998153686523, |
| "C": -9.661713600158691, |
| "D": -11.012078285217285, |
| "E": -10.494278907775879 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.482576847076416, |
| "scores": { |
| "A": -6.736016750335693, |
| "B": -8.21859359741211, |
| "C": -8.19207763671875, |
| "D": -8.997150421142578, |
| "E": -7.911620616912842 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.1714725494384766, |
| "scores": { |
| "A": -9.029961585998535, |
| "B": -11.201434135437012, |
| "C": -11.244138717651367, |
| "D": -11.50003433227539, |
| "E": -10.598953247070312 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-78", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.0808143615722656, |
| "scores": { |
| "A": -12.794174194335938, |
| "B": -8.323003768920898, |
| "C": -11.403818130493164, |
| "D": -13.768218994140625, |
| "E": -13.847496032714844 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.647706031799316, |
| "scores": { |
| "A": -5.634004592895508, |
| "B": -10.281710624694824, |
| "C": -11.297346115112305, |
| "D": -12.075166702270508, |
| "E": -12.413890838623047 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.6711091995239258, |
| "scores": { |
| "A": -10.68982982635498, |
| "B": -7.3767290115356445, |
| "C": -9.04783821105957, |
| "D": -11.847247123718262, |
| "E": -11.526897430419922 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.0808143615722656, |
| "scores": { |
| "A": -12.794168472290039, |
| "B": -8.322998046875, |
| "C": -11.403812408447266, |
| "D": -13.768211364746094, |
| "E": -13.847491264343262 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.0808143615722656, |
| "scores": { |
| "A": -12.794168472290039, |
| "B": -8.322998046875, |
| "C": -11.403812408447266, |
| "D": -13.768211364746094, |
| "E": -13.847491264343262 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.6936893463134766, |
| "scores": { |
| "A": -5.412115097045898, |
| "B": -9.105804443359375, |
| "C": -10.826802253723145, |
| "D": -11.795136451721191, |
| "E": -9.426908493041992 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.8150687217712402, |
| "scores": { |
| "A": -11.064653396606445, |
| "B": -7.3195881843566895, |
| "C": -9.13465690612793, |
| "D": -11.857183456420898, |
| "E": -11.807918548583984 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.0817832946777344, |
| "scores": { |
| "A": -6.042266845703125, |
| "B": -8.12405014038086, |
| "C": -8.708685874938965, |
| "D": -9.721263885498047, |
| "E": -7.989676475524902 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.647706985473633, |
| "scores": { |
| "A": -5.633998870849609, |
| "B": -10.281705856323242, |
| "C": -11.297343254089355, |
| "D": -12.075161933898926, |
| "E": -12.413885116577148 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-87", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6314544677734375, |
| "scores": { |
| "A": -9.793952941894531, |
| "B": -9.162498474121094, |
| "C": -11.231021881103516, |
| "D": -12.002910614013672, |
| "E": -11.467964172363281 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.9217519760131836, |
| "scores": { |
| "A": -6.585877418518066, |
| "B": -9.50762939453125, |
| "C": -9.712257385253906, |
| "D": -9.212251663208008, |
| "E": -11.261186599731445 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.4228715896606445, |
| "scores": { |
| "A": -9.903858184814453, |
| "B": -8.480986595153809, |
| "C": -10.398112297058105, |
| "D": -10.929282188415527, |
| "E": -11.117732048034668 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6314525604248047, |
| "scores": { |
| "A": -9.793954849243164, |
| "B": -9.16250228881836, |
| "C": -11.231022834777832, |
| "D": -12.002912521362305, |
| "E": -11.467966079711914 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6314525604248047, |
| "scores": { |
| "A": -9.793954849243164, |
| "B": -9.16250228881836, |
| "C": -11.231022834777832, |
| "D": -12.002912521362305, |
| "E": -11.467966079711914 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.8357162475585938, |
| "scores": { |
| "A": -4.554224967956543, |
| "B": -7.389941215515137, |
| "C": -8.258695602416992, |
| "D": -7.952755928039551, |
| "E": -9.503331184387207 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1273679733276367, |
| "scores": { |
| "A": -9.8775634765625, |
| "B": -8.750195503234863, |
| "C": -10.547350883483887, |
| "D": -10.983156204223633, |
| "E": -11.062116622924805 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.535149574279785, |
| "scores": { |
| "A": -3.4644904136657715, |
| "B": -5.999639987945557, |
| "C": -6.94714879989624, |
| "D": -6.253420352935791, |
| "E": -7.40266752243042 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.921750068664551, |
| "scores": { |
| "A": -6.585874557495117, |
| "B": -9.507624626159668, |
| "C": -9.712258338928223, |
| "D": -9.212246894836426, |
| "E": -11.261183738708496 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-100", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.1513805389404297, |
| "scores": { |
| "A": -9.272323608398438, |
| "B": -9.739631652832031, |
| "C": -9.120943069458008, |
| "D": -10.063505172729492, |
| "E": -10.608749389648438 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.166820049285889, |
| "scores": { |
| "A": -4.778280735015869, |
| "B": -9.417329788208008, |
| "C": -10.945100784301758, |
| "D": -11.501747131347656, |
| "E": -13.226821899414062 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.42541027069091797, |
| "scores": { |
| "A": -8.251370429992676, |
| "B": -8.534682273864746, |
| "C": -7.825960159301758, |
| "D": -9.516815185546875, |
| "E": -10.607525825500488 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.1513843536376953, |
| "scores": { |
| "A": -9.272323608398438, |
| "B": -9.739627838134766, |
| "C": -9.120939254760742, |
| "D": -10.063497543334961, |
| "E": -10.608743667602539 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.1513843536376953, |
| "scores": { |
| "A": -9.272323608398438, |
| "B": -9.739627838134766, |
| "C": -9.120939254760742, |
| "D": -10.063497543334961, |
| "E": -10.608743667602539 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.7572784423828125, |
| "scores": { |
| "A": -5.2430009841918945, |
| "B": -9.474717140197754, |
| "C": -11.000279426574707, |
| "D": -10.994614601135254, |
| "E": -11.884474754333496 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.5618510246276855, |
| "scores": { |
| "A": -8.301986694335938, |
| "B": -8.291629791259766, |
| "C": -7.72977876663208, |
| "D": -9.342042922973633, |
| "E": -10.563093185424805 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.407781600952148, |
| "scores": { |
| "A": -5.431789398193359, |
| "B": -11.169084548950195, |
| "C": -11.839570999145508, |
| "D": -14.098028182983398, |
| "E": -14.046358108520508 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.166818618774414, |
| "scores": { |
| "A": -4.778277397155762, |
| "B": -9.417325019836426, |
| "C": -10.945096015930176, |
| "D": -11.501741409301758, |
| "E": -13.226816177368164 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-103", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1493282318115234, |
| "scores": { |
| "A": -9.748441696166992, |
| "B": -8.529296875, |
| "C": -9.693557739257812, |
| "D": -11.449222564697266, |
| "E": -9.678625106811523 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.7451763153076172, |
| "scores": { |
| "A": -7.868520736694336, |
| "B": -8.613697052001953, |
| "C": -10.544960975646973, |
| "D": -9.806873321533203, |
| "E": -8.439764022827148 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.2809290885925293, |
| "scores": { |
| "A": -8.969255447387695, |
| "B": -7.4534783363342285, |
| "C": -8.759740829467773, |
| "D": -10.289947509765625, |
| "E": -8.734407424926758 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1493244171142578, |
| "scores": { |
| "A": -9.748445510864258, |
| "B": -8.529302597045898, |
| "C": -9.693565368652344, |
| "D": -11.449226379394531, |
| "E": -9.678627014160156 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1493244171142578, |
| "scores": { |
| "A": -9.748445510864258, |
| "B": -8.529302597045898, |
| "C": -9.693565368652344, |
| "D": -11.449226379394531, |
| "E": -9.678627014160156 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -0.2049875259399414, |
| "scores": { |
| "A": -5.457864761352539, |
| "B": -5.467221260070801, |
| "C": -8.277172088623047, |
| "D": -7.82890510559082, |
| "E": -5.262233734130859 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.9715185165405273, |
| "scores": { |
| "A": -8.980949401855469, |
| "B": -7.786107063293457, |
| "C": -9.11854076385498, |
| "D": -10.403557777404785, |
| "E": -8.757625579833984 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -1.1303739547729492, |
| "scores": { |
| "A": -7.216065406799316, |
| "B": -7.767756462097168, |
| "C": -9.236971855163574, |
| "D": -8.464241981506348, |
| "E": -6.637382507324219 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.7451763153076172, |
| "scores": { |
| "A": -7.8685197830200195, |
| "B": -8.613696098327637, |
| "C": -10.544960021972656, |
| "D": -9.806873321533203, |
| "E": -8.439760208129883 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-105", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 3.0392799377441406, |
| "scores": { |
| "A": -11.515534400939941, |
| "B": -12.032148361206055, |
| "C": -8.4762544631958, |
| "D": -13.967401504516602, |
| "E": -13.267354011535645 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.133573055267334, |
| "scores": { |
| "A": -7.463276386260986, |
| "B": -9.80911636352539, |
| "C": -8.59684944152832, |
| "D": -13.382390975952148, |
| "E": -13.58960247039795 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.2149181365966797, |
| "scores": { |
| "A": -10.347264289855957, |
| "B": -10.250322341918945, |
| "C": -8.035404205322266, |
| "D": -12.775790214538574, |
| "E": -12.733001708984375 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 3.039278030395508, |
| "scores": { |
| "A": -11.515533447265625, |
| "B": -12.032148361206055, |
| "C": -8.476255416870117, |
| "D": -13.967406272888184, |
| "E": -13.267354965209961 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 3.039278030395508, |
| "scores": { |
| "A": -11.515533447265625, |
| "B": -12.032148361206055, |
| "C": -8.476255416870117, |
| "D": -13.967406272888184, |
| "E": -13.267354965209961 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.6724023818969727, |
| "scores": { |
| "A": -5.971221446990967, |
| "B": -7.549536228179932, |
| "C": -7.6436238288879395, |
| "D": -11.26807689666748, |
| "E": -11.341324806213379 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.1505050659179688, |
| "scores": { |
| "A": -10.598121643066406, |
| "B": -10.289155960083008, |
| "C": -8.138650894165039, |
| "D": -12.586099624633789, |
| "E": -12.69243049621582 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.6481122970581055, |
| "scores": { |
| "A": -6.949349403381348, |
| "B": -8.987531661987305, |
| "C": -8.597461700439453, |
| "D": -10.919259071350098, |
| "E": -11.619161605834961 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.1335716247558594, |
| "scores": { |
| "A": -7.4632720947265625, |
| "B": -9.809111595153809, |
| "C": -8.596843719482422, |
| "D": -13.382383346557617, |
| "E": -13.589597702026367 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-111", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.18841552734375, |
| "scores": { |
| "A": -9.808207511901855, |
| "B": -9.283623695373535, |
| "C": -9.472039222717285, |
| "D": -10.7572660446167, |
| "E": -11.43770980834961 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.6287879943847656, |
| "scores": { |
| "A": -8.439443588256836, |
| "B": -12.068231582641602, |
| "C": -12.49129867553711, |
| "D": -13.331933975219727, |
| "E": -14.553701400756836 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.03671741485595703, |
| "scores": { |
| "A": -9.123326301574707, |
| "B": -8.150065422058105, |
| "C": -8.113348007202148, |
| "D": -9.43490219116211, |
| "E": -9.247275352478027 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.1884136199951172, |
| "scores": { |
| "A": -9.808208465576172, |
| "B": -9.283626556396484, |
| "C": -9.472040176391602, |
| "D": -10.757265090942383, |
| "E": -11.43770980834961 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.1884136199951172, |
| "scores": { |
| "A": -9.808208465576172, |
| "B": -9.283626556396484, |
| "C": -9.472040176391602, |
| "D": -10.757265090942383, |
| "E": -11.43770980834961 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.1629467010498047, |
| "scores": { |
| "A": -8.84734058380127, |
| "B": -11.010287284851074, |
| "C": -11.342278480529785, |
| "D": -11.88167953491211, |
| "E": -12.936474800109863 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.01021575927734375, |
| "scores": { |
| "A": -8.867430686950684, |
| "B": -7.981822967529297, |
| "C": -7.992038726806641, |
| "D": -9.280956268310547, |
| "E": -9.115897178649902 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.627413272857666, |
| "scores": { |
| "A": -7.264729022979736, |
| "B": -9.892142295837402, |
| "C": -9.825416564941406, |
| "D": -10.610671997070312, |
| "E": -11.448965072631836 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.628787040710449, |
| "scores": { |
| "A": -8.43944263458252, |
| "B": -12.068229675292969, |
| "C": -12.491294860839844, |
| "D": -13.331932067871094, |
| "E": -14.55370044708252 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-116", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.7076244354248047, |
| "scores": { |
| "A": -12.037410736083984, |
| "B": -9.75387191772461, |
| "C": -11.461496353149414, |
| "D": -11.536352157592773, |
| "E": -11.817276000976562 |
| } |
| }, |
| "ablated": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.5289134979248047, |
| "scores": { |
| "A": -7.282122611999512, |
| "B": -7.493680000305176, |
| "C": -8.805983543395996, |
| "D": -6.964766502380371, |
| "E": -7.28157901763916 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.3713750839233398, |
| "scores": { |
| "A": -8.353381156921387, |
| "B": -6.982006072998047, |
| "C": -8.779082298278809, |
| "D": -8.85804271697998, |
| "E": -9.427443504333496 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.707632064819336, |
| "scores": { |
| "A": -12.037415504455566, |
| "B": -9.753875732421875, |
| "C": -11.461507797241211, |
| "D": -11.536357879638672, |
| "E": -11.817279815673828 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.707632064819336, |
| "scores": { |
| "A": -12.037415504455566, |
| "B": -9.753875732421875, |
| "C": -11.461507797241211, |
| "D": -11.536357879638672, |
| "E": -11.817279815673828 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -0.8397531509399414, |
| "scores": { |
| "A": -7.520603656768799, |
| "B": -6.9838643074035645, |
| "C": -9.986032485961914, |
| "D": -7.869058132171631, |
| "E": -6.144111156463623 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.511284351348877, |
| "scores": { |
| "A": -8.105411529541016, |
| "B": -6.594127178192139, |
| "C": -8.44879150390625, |
| "D": -8.556886672973633, |
| "E": -9.028934478759766 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.8564348220825195, |
| "scores": { |
| "A": -7.948397636413574, |
| "B": -7.091962814331055, |
| "C": -10.485963821411133, |
| "D": -10.730182647705078, |
| "E": -8.363138198852539 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.5289154052734375, |
| "scores": { |
| "A": -7.282122611999512, |
| "B": -7.493679046630859, |
| "C": -8.80598258972168, |
| "D": -6.964763641357422, |
| "E": -7.281576156616211 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-120", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.3595123291015625, |
| "scores": { |
| "A": -12.646347045898438, |
| "B": -10.183612823486328, |
| "C": -10.54312515258789, |
| "D": -11.979488372802734, |
| "E": -12.640970230102539 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.8485918045043945, |
| "scores": { |
| "A": -7.463525772094727, |
| "B": -12.312117576599121, |
| "C": -11.753535270690918, |
| "D": -12.008286476135254, |
| "E": -13.767097473144531 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.4900999069213867, |
| "scores": { |
| "A": -12.293493270874023, |
| "B": -9.351531982421875, |
| "C": -8.861432075500488, |
| "D": -10.692264556884766, |
| "E": -10.770162582397461 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.3595142364501953, |
| "scores": { |
| "A": -12.646347045898438, |
| "B": -10.183609008789062, |
| "C": -10.543123245239258, |
| "D": -11.979488372802734, |
| "E": -12.640968322753906 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.3595142364501953, |
| "scores": { |
| "A": -12.646347045898438, |
| "B": -10.183609008789062, |
| "C": -10.543123245239258, |
| "D": -11.979488372802734, |
| "E": -12.640968322753906 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.472604751586914, |
| "scores": { |
| "A": -9.625394821166992, |
| "B": -14.097999572753906, |
| "C": -14.751794815063477, |
| "D": -14.831222534179688, |
| "E": -15.224090576171875 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.44132232666015625, |
| "scores": { |
| "A": -12.334344863891602, |
| "B": -9.446332931518555, |
| "C": -9.005010604858398, |
| "D": -10.665205001831055, |
| "E": -10.99666976928711 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.0735368728637695, |
| "scores": { |
| "A": -5.083780288696289, |
| "B": -8.157317161560059, |
| "C": -8.276480674743652, |
| "D": -9.385171890258789, |
| "E": -10.254486083984375 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.848588466644287, |
| "scores": { |
| "A": -7.463529109954834, |
| "B": -12.312117576599121, |
| "C": -11.753534317016602, |
| "D": -12.008285522460938, |
| "E": -13.767098426818848 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-122", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6598358154296875, |
| "scores": { |
| "A": -11.076019287109375, |
| "B": -10.416183471679688, |
| "C": -13.238750457763672, |
| "D": -13.289159774780273, |
| "E": -13.489381790161133 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.3248538970947266, |
| "scores": { |
| "A": -6.911991119384766, |
| "B": -9.236845016479492, |
| "C": -12.405698776245117, |
| "D": -10.99496078491211, |
| "E": -12.164006233215332 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.3234901428222656, |
| "scores": { |
| "A": -10.238473892211914, |
| "B": -9.914983749389648, |
| "C": -12.03645133972168, |
| "D": -12.105175018310547, |
| "E": -13.415177345275879 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6598358154296875, |
| "scores": { |
| "A": -11.076021194458008, |
| "B": -10.41618537902832, |
| "C": -13.23875617980957, |
| "D": -13.289161682128906, |
| "E": -13.489385604858398 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6598358154296875, |
| "scores": { |
| "A": -11.076021194458008, |
| "B": -10.41618537902832, |
| "C": -13.23875617980957, |
| "D": -13.289161682128906, |
| "E": -13.489385604858398 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.418147087097168, |
| "scores": { |
| "A": -5.337198257446289, |
| "B": -7.755345344543457, |
| "C": -12.551668167114258, |
| "D": -11.829740524291992, |
| "E": -12.763933181762695 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2971000671386719, |
| "scores": { |
| "A": -10.380821228027344, |
| "B": -10.083721160888672, |
| "C": -12.148942947387695, |
| "D": -12.248394966125488, |
| "E": -13.505158424377441 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.0811538696289062, |
| "scores": { |
| "A": -4.878448963165283, |
| "B": -7.9596028327941895, |
| "C": -11.607043266296387, |
| "D": -9.50536823272705, |
| "E": -11.090916633605957 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.3248538970947266, |
| "scores": { |
| "A": -6.911995887756348, |
| "B": -9.236849784851074, |
| "C": -12.405706405639648, |
| "D": -10.994964599609375, |
| "E": -12.164009094238281 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-123", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.928288459777832, |
| "scores": { |
| "A": -12.817946434020996, |
| "B": -13.251622200012207, |
| "C": -10.08199405670166, |
| "D": -12.010282516479492, |
| "E": -12.828923225402832 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.036839485168457, |
| "scores": { |
| "A": -7.727773666381836, |
| "B": -10.925074577331543, |
| "C": -11.764613151550293, |
| "D": -11.528144836425781, |
| "E": -13.928091049194336 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.1131134033203125, |
| "scores": { |
| "A": -11.61458969116211, |
| "B": -11.275108337402344, |
| "C": -11.161994934082031, |
| "D": -11.986404418945312, |
| "E": -12.851778030395508 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.9282875061035156, |
| "scores": { |
| "A": -12.817957878112793, |
| "B": -13.251638412475586, |
| "C": -10.082003593444824, |
| "D": -12.01029109954834, |
| "E": -12.82893180847168 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.9282875061035156, |
| "scores": { |
| "A": -12.817957878112793, |
| "B": -13.251638412475586, |
| "C": -10.082003593444824, |
| "D": -12.01029109954834, |
| "E": -12.82893180847168 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.267435073852539, |
| "scores": { |
| "A": -8.1873779296875, |
| "B": -10.754793167114258, |
| "C": -12.454813003540039, |
| "D": -13.088325500488281, |
| "E": -14.197637557983398 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.0443572998046875, |
| "scores": { |
| "A": -11.579950332641602, |
| "B": -11.245656967163086, |
| "C": -11.201299667358398, |
| "D": -12.026390075683594, |
| "E": -12.818794250488281 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.751577854156494, |
| "scores": { |
| "A": -4.197388172149658, |
| "B": -8.354893684387207, |
| "C": -9.948966026306152, |
| "D": -9.155034065246582, |
| "E": -10.130256652832031 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.036839485168457, |
| "scores": { |
| "A": -7.7277727127075195, |
| "B": -10.925077438354492, |
| "C": -11.764612197875977, |
| "D": -11.528146743774414, |
| "E": -13.928092002868652 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-125", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.26287078857421875, |
| "scores": { |
| "A": -12.876455307006836, |
| "B": -12.006429672241211, |
| "C": -10.34354305267334, |
| "D": -10.606413841247559, |
| "E": -11.505398750305176 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.1885986328125, |
| "scores": { |
| "A": -6.909121513366699, |
| "B": -8.705928802490234, |
| "C": -10.0977201461792, |
| "D": -9.862305641174316, |
| "E": -10.177146911621094 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.24047374725341797, |
| "scores": { |
| "A": -9.58531379699707, |
| "B": -8.929466247558594, |
| "C": -9.169939994812012, |
| "D": -9.5785493850708, |
| "E": -10.480676651000977 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.26287078857421875, |
| "scores": { |
| "A": -12.876452445983887, |
| "B": -12.006429672241211, |
| "C": -10.34354305267334, |
| "D": -10.606413841247559, |
| "E": -11.50539779663086 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.26287078857421875, |
| "scores": { |
| "A": -12.876452445983887, |
| "B": -12.006429672241211, |
| "C": -10.34354305267334, |
| "D": -10.606413841247559, |
| "E": -11.50539779663086 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.153430461883545, |
| "scores": { |
| "A": -7.0063958168029785, |
| "B": -9.034759521484375, |
| "C": -10.159826278686523, |
| "D": -11.367905616760254, |
| "E": -11.336196899414062 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.06798362731933594, |
| "scores": { |
| "A": -9.642061233520508, |
| "B": -8.649129867553711, |
| "C": -8.717113494873047, |
| "D": -9.228824615478516, |
| "E": -10.170103073120117 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.090330123901367, |
| "scores": { |
| "A": -7.855500221252441, |
| "B": -8.707620620727539, |
| "C": -9.945830345153809, |
| "D": -13.191256523132324, |
| "E": -12.100184440612793 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.1885976791381836, |
| "scores": { |
| "A": -6.909127235412598, |
| "B": -8.7059326171875, |
| "C": -10.097724914550781, |
| "D": -9.862310409545898, |
| "E": -10.177148818969727 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-130", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.5736770629882812, |
| "scores": { |
| "A": -12.52768611907959, |
| "B": -11.624752044677734, |
| "C": -14.400633811950684, |
| "D": -11.051074981689453, |
| "E": -12.196588516235352 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.6399145126342773, |
| "scores": { |
| "A": -7.714714050292969, |
| "B": -7.738489151000977, |
| "C": -10.441914558410645, |
| "D": -8.354628562927246, |
| "E": -8.231303215026855 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.8715057373046875, |
| "scores": { |
| "A": -10.206192016601562, |
| "B": -8.503612518310547, |
| "C": -9.418848037719727, |
| "D": -9.375118255615234, |
| "E": -9.415210723876953 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.5736827850341797, |
| "scores": { |
| "A": -12.527690887451172, |
| "B": -11.624759674072266, |
| "C": -14.400640487670898, |
| "D": -11.051076889038086, |
| "E": -12.196598052978516 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.5736827850341797, |
| "scores": { |
| "A": -12.527690887451172, |
| "B": -11.624759674072266, |
| "C": -14.400640487670898, |
| "D": -11.051076889038086, |
| "E": -12.196598052978516 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.5013294219970703, |
| "scores": { |
| "A": -5.733486652374268, |
| "B": -5.340871334075928, |
| "C": -8.164079666137695, |
| "D": -6.842200756072998, |
| "E": -6.404123783111572 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.8385581970214844, |
| "scores": { |
| "A": -10.177495002746582, |
| "B": -8.628342628479004, |
| "C": -9.545053482055664, |
| "D": -9.466900825500488, |
| "E": -9.466954231262207 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.8849029541015625, |
| "scores": { |
| "A": -7.422325134277344, |
| "B": -7.01991081237793, |
| "C": -9.694180488586426, |
| "D": -8.904813766479492, |
| "E": -8.477489471435547 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.6399135589599609, |
| "scores": { |
| "A": -7.714714050292969, |
| "B": -7.738491058349609, |
| "C": -10.441915512084961, |
| "D": -8.35462760925293, |
| "E": -8.231302261352539 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-140", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.090902328491211, |
| "scores": { |
| "A": -12.17054557800293, |
| "B": -10.950679779052734, |
| "C": -12.478940963745117, |
| "D": -12.041582107543945, |
| "E": -12.825494766235352 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.0806522369384766, |
| "scores": { |
| "A": -8.631109237670898, |
| "B": -9.711761474609375, |
| "C": -10.810302734375, |
| "D": -10.214776992797852, |
| "E": -11.603350639343262 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1358776092529297, |
| "scores": { |
| "A": -9.581466674804688, |
| "B": -8.445589065551758, |
| "C": -9.826594352722168, |
| "D": -10.325157165527344, |
| "E": -11.036417961120605 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.0909004211425781, |
| "scores": { |
| "A": -12.17054557800293, |
| "B": -10.950679779052734, |
| "C": -12.478940963745117, |
| "D": -12.041580200195312, |
| "E": -12.825498580932617 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.0909004211425781, |
| "scores": { |
| "A": -12.17054557800293, |
| "B": -10.950679779052734, |
| "C": -12.478940963745117, |
| "D": -12.041580200195312, |
| "E": -12.825498580932617 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.6461381912231445, |
| "scores": { |
| "A": -8.256159782409668, |
| "B": -9.902297973632812, |
| "C": -11.741201400756836, |
| "D": -11.071731567382812, |
| "E": -12.105072975158691 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1649103164672852, |
| "scores": { |
| "A": -9.316244125366211, |
| "B": -8.151333808898926, |
| "C": -9.497949600219727, |
| "D": -10.047868728637695, |
| "E": -10.842238426208496 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.3181371688842773, |
| "scores": { |
| "A": -7.826546669006348, |
| "B": -9.144683837890625, |
| "C": -11.221202850341797, |
| "D": -10.469319343566895, |
| "E": -11.426633834838867 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.0806512832641602, |
| "scores": { |
| "A": -8.631110191345215, |
| "B": -9.711761474609375, |
| "C": -10.810298919677734, |
| "D": -10.214774131774902, |
| "E": -11.603349685668945 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-141", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.3972196578979492, |
| "scores": { |
| "A": -15.668845176696777, |
| "B": -14.022212028503418, |
| "C": -12.345376968383789, |
| "D": -12.742596626281738, |
| "E": -13.434144973754883 |
| } |
| }, |
| "ablated": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -1.4827747344970703, |
| "scores": { |
| "A": -9.032247543334961, |
| "B": -10.177014350891113, |
| "C": -9.580657005310059, |
| "D": -8.116410255432129, |
| "E": -8.097882270812988 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.9053430557250977, |
| "scores": { |
| "A": -12.772378921508789, |
| "B": -10.580516815185547, |
| "C": -8.90491771697998, |
| "D": -9.810260772705078, |
| "E": -9.986860275268555 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.3972187042236328, |
| "scores": { |
| "A": -15.668844223022461, |
| "B": -14.022214889526367, |
| "C": -12.345376968383789, |
| "D": -12.742595672607422, |
| "E": -13.43414306640625 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.3972187042236328, |
| "scores": { |
| "A": -15.668844223022461, |
| "B": -14.022214889526367, |
| "C": -12.345376968383789, |
| "D": -12.742595672607422, |
| "E": -13.43414306640625 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.9576740264892578, |
| "scores": { |
| "A": -8.790735244750977, |
| "B": -9.536684036254883, |
| "C": -9.24622917175293, |
| "D": -8.288555145263672, |
| "E": -8.68001937866211 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.9014320373535156, |
| "scores": { |
| "A": -12.741355895996094, |
| "B": -10.564189910888672, |
| "C": -8.901117324829102, |
| "D": -9.802549362182617, |
| "E": -9.944976806640625 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.398233413696289, |
| "scores": { |
| "A": -8.182514190673828, |
| "B": -11.277816772460938, |
| "C": -10.580747604370117, |
| "D": -9.102690696716309, |
| "E": -9.042513847351074 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -1.4827728271484375, |
| "scores": { |
| "A": -9.032241821289062, |
| "B": -10.177009582519531, |
| "C": -9.580652236938477, |
| "D": -8.116405487060547, |
| "E": -8.097879409790039 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-148", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.09283638000488281, |
| "scores": { |
| "A": -11.842838287353516, |
| "B": -8.686580657958984, |
| "C": -9.391075134277344, |
| "D": -8.593744277954102, |
| "E": -10.327585220336914 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.2917442321777344, |
| "scores": { |
| "A": -8.228094100952148, |
| "B": -9.228675842285156, |
| "C": -9.42142105102539, |
| "D": -8.519838333129883, |
| "E": -9.596782684326172 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.0341577529907227, |
| "scores": { |
| "A": -9.260769844055176, |
| "B": -6.733394622802734, |
| "C": -7.005693435668945, |
| "D": -7.767552375793457, |
| "E": -8.883651733398438 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.09284019470214844, |
| "scores": { |
| "A": -11.842844009399414, |
| "B": -8.686589241027832, |
| "C": -9.391081809997559, |
| "D": -8.593749046325684, |
| "E": -10.327591896057129 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.09284019470214844, |
| "scores": { |
| "A": -11.842844009399414, |
| "B": -8.686589241027832, |
| "C": -9.391081809997559, |
| "D": -8.593749046325684, |
| "E": -10.327591896057129 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.08560562133789062, |
| "scores": { |
| "A": -6.6732988357543945, |
| "B": -7.1317548751831055, |
| "C": -7.866713523864746, |
| "D": -6.758904457092285, |
| "E": -8.30843448638916 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.9941120147705078, |
| "scores": { |
| "A": -9.38866901397705, |
| "B": -6.864222526550293, |
| "C": -7.074477195739746, |
| "D": -7.858334541320801, |
| "E": -8.94621753692627 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.6664242744445801, |
| "scores": { |
| "A": -7.582589626312256, |
| "B": -8.015180587768555, |
| "C": -9.47393798828125, |
| "D": -8.249013900756836, |
| "E": -8.513860702514648 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.2917442321777344, |
| "scores": { |
| "A": -8.228096008300781, |
| "B": -9.228679656982422, |
| "C": -9.42142105102539, |
| "D": -8.519840240478516, |
| "E": -9.596784591674805 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-152", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6472129821777344, |
| "scores": { |
| "A": -12.141305923461914, |
| "B": -11.08128833770752, |
| "C": -11.728501319885254, |
| "D": -11.744885444641113, |
| "E": -11.734070777893066 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.606741905212402, |
| "scores": { |
| "A": -8.290619850158691, |
| "B": -12.897361755371094, |
| "C": -16.176721572875977, |
| "D": -13.130666732788086, |
| "E": -13.918773651123047 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7464790344238281, |
| "scores": { |
| "A": -10.325726509094238, |
| "B": -9.529410362243652, |
| "C": -10.725006103515625, |
| "D": -10.360553741455078, |
| "E": -10.27588939666748 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6472129821777344, |
| "scores": { |
| "A": -12.141304969787598, |
| "B": -11.081286430358887, |
| "C": -11.728499412536621, |
| "D": -11.744885444641113, |
| "E": -11.7340726852417 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6472129821777344, |
| "scores": { |
| "A": -12.141304969787598, |
| "B": -11.081286430358887, |
| "C": -11.728499412536621, |
| "D": -11.744885444641113, |
| "E": -11.7340726852417 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.180154800415039, |
| "scores": { |
| "A": -8.851917266845703, |
| "B": -12.032072067260742, |
| "C": -15.113250732421875, |
| "D": -13.466560363769531, |
| "E": -12.649953842163086 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.4894752502441406, |
| "scores": { |
| "A": -10.067070007324219, |
| "B": -9.577594757080078, |
| "C": -10.798778533935547, |
| "D": -10.269950866699219, |
| "E": -10.165655136108398 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.321539878845215, |
| "scores": { |
| "A": -8.855324745178223, |
| "B": -12.176864624023438, |
| "C": -15.197938919067383, |
| "D": -12.816364288330078, |
| "E": -12.050538063049316 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.606740951538086, |
| "scores": { |
| "A": -8.29061508178711, |
| "B": -12.897356033325195, |
| "C": -16.176713943481445, |
| "D": -13.13066291809082, |
| "E": -13.918767929077148 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-167", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.286722183227539, |
| "scores": { |
| "A": -13.319049835205078, |
| "B": -10.63465690612793, |
| "C": -12.921379089355469, |
| "D": -16.10821533203125, |
| "E": -14.74123764038086 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.3104257583618164, |
| "scores": { |
| "A": -8.05471420288086, |
| "B": -11.365139961242676, |
| "C": -15.134896278381348, |
| "D": -13.336740493774414, |
| "E": -14.394715309143066 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.7309093475341797, |
| "scores": { |
| "A": -11.87700366973877, |
| "B": -10.14609432220459, |
| "C": -11.895035743713379, |
| "D": -13.453927040100098, |
| "E": -13.269637107849121 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.286722183227539, |
| "scores": { |
| "A": -13.319046020507812, |
| "B": -10.634654998779297, |
| "C": -12.921377182006836, |
| "D": -16.10821533203125, |
| "E": -14.74123764038086 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.286722183227539, |
| "scores": { |
| "A": -13.319046020507812, |
| "B": -10.634654998779297, |
| "C": -12.921377182006836, |
| "D": -16.10821533203125, |
| "E": -14.74123764038086 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.7075023651123047, |
| "scores": { |
| "A": -8.526466369628906, |
| "B": -11.233968734741211, |
| "C": -16.785362243652344, |
| "D": -15.479930877685547, |
| "E": -15.917808532714844 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.8152027130126953, |
| "scores": { |
| "A": -12.013884544372559, |
| "B": -10.008508682250977, |
| "C": -11.823711395263672, |
| "D": -13.332377433776855, |
| "E": -13.145186424255371 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.2307591438293457, |
| "scores": { |
| "A": -7.303309917449951, |
| "B": -10.534069061279297, |
| "C": -14.878864288330078, |
| "D": -14.44310188293457, |
| "E": -14.893917083740234 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.3104257583618164, |
| "scores": { |
| "A": -8.054710388183594, |
| "B": -11.36513614654541, |
| "C": -15.13489055633545, |
| "D": -13.336731910705566, |
| "E": -14.394709587097168 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-178", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.44650745391845703, |
| "scores": { |
| "A": -13.206219673156738, |
| "B": -11.094629287719727, |
| "C": -12.79085922241211, |
| "D": -12.61279582977295, |
| "E": -10.64812183380127 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.734641075134277, |
| "scores": { |
| "A": -5.792222023010254, |
| "B": -10.40644359588623, |
| "C": -8.512224197387695, |
| "D": -10.881692886352539, |
| "E": -12.526863098144531 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.943204402923584, |
| "scores": { |
| "A": -9.805769920349121, |
| "B": -5.3867316246032715, |
| "C": -6.29595947265625, |
| "D": -9.170482635498047, |
| "E": -8.329936027526855 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.4465036392211914, |
| "scores": { |
| "A": -13.206205368041992, |
| "B": -11.094612121582031, |
| "C": -12.790840148925781, |
| "D": -12.612784385681152, |
| "E": -10.64810848236084 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.4465036392211914, |
| "scores": { |
| "A": -13.206205368041992, |
| "B": -11.094612121582031, |
| "C": -12.790840148925781, |
| "D": -12.612784385681152, |
| "E": -10.64810848236084 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.96022367477417, |
| "scores": { |
| "A": -5.559600353240967, |
| "B": -9.4379243850708, |
| "C": -8.867037773132324, |
| "D": -11.44845199584961, |
| "E": -10.519824028015137 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.127613067626953, |
| "scores": { |
| "A": -9.935117721557617, |
| "B": -5.414183616638184, |
| "C": -6.386631965637207, |
| "D": -9.185277938842773, |
| "E": -8.541796684265137 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.373246669769287, |
| "scores": { |
| "A": -6.259435176849365, |
| "B": -10.628332138061523, |
| "C": -10.076188087463379, |
| "D": -13.082308769226074, |
| "E": -12.632681846618652 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.734642028808594, |
| "scores": { |
| "A": -5.792219161987305, |
| "B": -10.406440734863281, |
| "C": -8.512224197387695, |
| "D": -10.881689071655273, |
| "E": -12.526861190795898 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-181", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7682161331176758, |
| "scores": { |
| "A": -9.629287719726562, |
| "B": -8.861071586608887, |
| "C": -11.832342147827148, |
| "D": -11.63463020324707, |
| "E": -10.680866241455078 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.5154037475585938, |
| "scores": { |
| "A": -7.565939903259277, |
| "B": -9.081343650817871, |
| "C": -10.455299377441406, |
| "D": -9.157304763793945, |
| "E": -9.032361030578613 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.180495262145996, |
| "scores": { |
| "A": -9.739990234375, |
| "B": -8.418399810791016, |
| "C": -9.598895072937012, |
| "D": -10.474803924560547, |
| "E": -9.851222038269043 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7682132720947266, |
| "scores": { |
| "A": -9.629287719726562, |
| "B": -8.861074447631836, |
| "C": -11.832342147827148, |
| "D": -11.634628295898438, |
| "E": -10.680864334106445 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7682132720947266, |
| "scores": { |
| "A": -9.629287719726562, |
| "B": -8.861074447631836, |
| "C": -11.832342147827148, |
| "D": -11.634628295898438, |
| "E": -10.680864334106445 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.6810026168823242, |
| "scores": { |
| "A": -4.672728061676025, |
| "B": -5.35373067855835, |
| "C": -7.516335964202881, |
| "D": -6.342844486236572, |
| "E": -4.79114294052124 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1485824584960938, |
| "scores": { |
| "A": -9.888175964355469, |
| "B": -8.739593505859375, |
| "C": -9.970766067504883, |
| "D": -10.718721389770508, |
| "E": -10.002328872680664 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.9188671112060547, |
| "scores": { |
| "A": -9.10055160522461, |
| "B": -10.019418716430664, |
| "C": -11.787355422973633, |
| "D": -10.639719009399414, |
| "E": -9.353466033935547 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.515404224395752, |
| "scores": { |
| "A": -7.565932750701904, |
| "B": -9.081336975097656, |
| "C": -10.455291748046875, |
| "D": -9.157295227050781, |
| "E": -9.032354354858398 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-183", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.4096593856811523, |
| "scores": { |
| "A": -12.662055969238281, |
| "B": -9.372528076171875, |
| "C": -10.782187461853027, |
| "D": -13.160992622375488, |
| "E": -13.141705513000488 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.7217111587524414, |
| "scores": { |
| "A": -8.667959213256836, |
| "B": -7.500253200531006, |
| "C": -6.7785420417785645, |
| "D": -9.29892349243164, |
| "E": -10.76202392578125 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6202316284179688, |
| "scores": { |
| "A": -10.945490837097168, |
| "B": -7.309451103210449, |
| "C": -7.929682731628418, |
| "D": -10.611489295959473, |
| "E": -11.70968246459961 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.409658432006836, |
| "scores": { |
| "A": -12.662059783935547, |
| "B": -9.372528076171875, |
| "C": -10.782186508178711, |
| "D": -13.160991668701172, |
| "E": -13.141706466674805 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.409658432006836, |
| "scores": { |
| "A": -12.662059783935547, |
| "B": -9.372528076171875, |
| "C": -10.782186508178711, |
| "D": -13.160991668701172, |
| "E": -13.141706466674805 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.8798198699951172, |
| "scores": { |
| "A": -10.624456405639648, |
| "B": -9.079212188720703, |
| "C": -8.199392318725586, |
| "D": -13.810981750488281, |
| "E": -14.749567031860352 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6436195373535156, |
| "scores": { |
| "A": -10.855729103088379, |
| "B": -7.0558881759643555, |
| "C": -7.699507713317871, |
| "D": -10.375727653503418, |
| "E": -11.54419231414795 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.7752676010131836, |
| "scores": { |
| "A": -7.476681232452393, |
| "B": -7.795263767242432, |
| "C": -7.019996166229248, |
| "D": -9.245484352111816, |
| "E": -9.37934684753418 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.7217121124267578, |
| "scores": { |
| "A": -8.667959213256836, |
| "B": -7.500255584716797, |
| "C": -6.778543472290039, |
| "D": -9.298927307128906, |
| "E": -10.762025833129883 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-189", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.20896244049072266, |
| "scores": { |
| "A": -14.54034423828125, |
| "B": -11.554760932922363, |
| "C": -11.811978340148926, |
| "D": -11.763723373413086, |
| "E": -13.348597526550293 |
| } |
| }, |
| "ablated": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.8642768859863281, |
| "scores": { |
| "A": -9.882810592651367, |
| "B": -10.419057846069336, |
| "C": -10.307378768920898, |
| "D": -9.554780960083008, |
| "E": -9.593378067016602 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.7312335968017578, |
| "scores": { |
| "A": -11.619770050048828, |
| "B": -9.795265197753906, |
| "C": -9.064031600952148, |
| "D": -9.905113220214844, |
| "E": -9.719362258911133 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.20896339416503906, |
| "scores": { |
| "A": -14.54034423828125, |
| "B": -11.554759979248047, |
| "C": -11.811981201171875, |
| "D": -11.763723373413086, |
| "E": -13.348596572875977 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.20896339416503906, |
| "scores": { |
| "A": -14.54034423828125, |
| "B": -11.554759979248047, |
| "C": -11.811981201171875, |
| "D": -11.763723373413086, |
| "E": -13.348596572875977 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.6974210739135742, |
| "scores": { |
| "A": -9.276986122131348, |
| "B": -9.103320121765137, |
| "C": -10.14097785949707, |
| "D": -8.405899047851562, |
| "E": -8.49357795715332 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.7965354919433594, |
| "scores": { |
| "A": -11.578865051269531, |
| "B": -9.813770294189453, |
| "C": -9.017234802246094, |
| "D": -9.923456192016602, |
| "E": -9.655179977416992 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -0.7022542953491211, |
| "scores": { |
| "A": -6.854315757751465, |
| "B": -6.292705535888672, |
| "C": -6.938782215118408, |
| "D": -5.983695030212402, |
| "E": -5.590451240539551 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.8642749786376953, |
| "scores": { |
| "A": -9.88280963897705, |
| "B": -10.419052124023438, |
| "C": -10.30737590789795, |
| "D": -9.554777145385742, |
| "E": -9.59337329864502 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-190", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.35360145568847656, |
| "scores": { |
| "A": -13.596860885620117, |
| "B": -10.771349906921387, |
| "C": -10.41774845123291, |
| "D": -13.349145889282227, |
| "E": -13.912391662597656 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.489037036895752, |
| "scores": { |
| "A": -6.3273138999938965, |
| "B": -9.247300148010254, |
| "C": -12.816350936889648, |
| "D": -10.787364959716797, |
| "E": -12.917289733886719 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.29494571685791016, |
| "scores": { |
| "A": -10.62340259552002, |
| "B": -9.846135139465332, |
| "C": -9.551189422607422, |
| "D": -10.956984519958496, |
| "E": -11.488529205322266 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.35360145568847656, |
| "scores": { |
| "A": -13.59686279296875, |
| "B": -10.771347999572754, |
| "C": -10.417746543884277, |
| "D": -13.34914493560791, |
| "E": -13.912391662597656 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.35360145568847656, |
| "scores": { |
| "A": -13.59686279296875, |
| "B": -10.771347999572754, |
| "C": -10.417746543884277, |
| "D": -13.34914493560791, |
| "E": -13.912391662597656 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.209604263305664, |
| "scores": { |
| "A": -7.986570358276367, |
| "B": -10.776678085327148, |
| "C": -14.196174621582031, |
| "D": -12.462160110473633, |
| "E": -14.763839721679688 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.43008899688720703, |
| "scores": { |
| "A": -10.530719757080078, |
| "B": -10.036431312561035, |
| "C": -9.606342315673828, |
| "D": -10.94388198852539, |
| "E": -11.484930038452148 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.7598161697387695, |
| "scores": { |
| "A": -8.101582527160645, |
| "B": -8.396411895751953, |
| "C": -12.861398696899414, |
| "D": -12.736745834350586, |
| "E": -13.218839645385742 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.489035606384277, |
| "scores": { |
| "A": -6.327314376831055, |
| "B": -9.247294425964355, |
| "C": -12.816349983215332, |
| "D": -10.787363052368164, |
| "E": -12.917287826538086 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-191", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.9574899673461914, |
| "scores": { |
| "A": -11.895600318908691, |
| "B": -10.9381103515625, |
| "C": -13.633337020874023, |
| "D": -14.099964141845703, |
| "E": -13.749225616455078 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.093125343322754, |
| "scores": { |
| "A": -6.434209823608398, |
| "B": -8.527335166931152, |
| "C": -11.775838851928711, |
| "D": -11.290367126464844, |
| "E": -12.324054718017578 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.0822925567626953, |
| "scores": { |
| "A": -8.277904510498047, |
| "B": -7.195611953735352, |
| "C": -8.55109977722168, |
| "D": -9.725017547607422, |
| "E": -9.788521766662598 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.9574871063232422, |
| "scores": { |
| "A": -11.895593643188477, |
| "B": -10.938106536865234, |
| "C": -13.63333511352539, |
| "D": -14.099959373474121, |
| "E": -13.749227523803711 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.9574871063232422, |
| "scores": { |
| "A": -11.895593643188477, |
| "B": -10.938106536865234, |
| "C": -13.63333511352539, |
| "D": -14.099959373474121, |
| "E": -13.749227523803711 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.307638168334961, |
| "scores": { |
| "A": -6.399979591369629, |
| "B": -7.70761775970459, |
| "C": -11.518457412719727, |
| "D": -11.318992614746094, |
| "E": -11.238313674926758 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.0426197052001953, |
| "scores": { |
| "A": -8.261636734008789, |
| "B": -7.219017028808594, |
| "C": -8.581559181213379, |
| "D": -9.666144371032715, |
| "E": -9.812707901000977 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.6853055953979492, |
| "scores": { |
| "A": -7.959956169128418, |
| "B": -9.645261764526367, |
| "C": -13.037406921386719, |
| "D": -12.513145446777344, |
| "E": -9.820344924926758 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.093125820159912, |
| "scores": { |
| "A": -6.434208393096924, |
| "B": -8.527334213256836, |
| "C": -11.775838851928711, |
| "D": -11.290367126464844, |
| "E": -12.324055671691895 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-206", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.18168067932128906, |
| "scores": { |
| "A": -11.602930068969727, |
| "B": -10.961795806884766, |
| "C": -11.143476486206055, |
| "D": -12.837438583374023, |
| "E": -14.00632095336914 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.0302047729492188, |
| "scores": { |
| "A": -8.013933181762695, |
| "B": -11.044137954711914, |
| "C": -12.337331771850586, |
| "D": -11.77204704284668, |
| "E": -14.158761024475098 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.8052225112915039, |
| "scores": { |
| "A": -11.124311447143555, |
| "B": -10.31908893585205, |
| "C": -11.636249542236328, |
| "D": -12.520262718200684, |
| "E": -14.125991821289062 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.18168067932128906, |
| "scores": { |
| "A": -11.60293197631836, |
| "B": -10.961797714233398, |
| "C": -11.143478393554688, |
| "D": -12.837438583374023, |
| "E": -14.006319046020508 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.18168067932128906, |
| "scores": { |
| "A": -11.60293197631836, |
| "B": -10.961797714233398, |
| "C": -11.143478393554688, |
| "D": -12.837438583374023, |
| "E": -14.006319046020508 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.3860421180725098, |
| "scores": { |
| "A": -6.778487682342529, |
| "B": -9.164529800415039, |
| "C": -12.009851455688477, |
| "D": -12.049808502197266, |
| "E": -13.598691940307617 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.9240913391113281, |
| "scores": { |
| "A": -11.181600570678711, |
| "B": -10.257509231567383, |
| "C": -11.613018035888672, |
| "D": -12.530416488647461, |
| "E": -13.934183120727539 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.196244239807129, |
| "scores": { |
| "A": -7.369185447692871, |
| "B": -9.5654296875, |
| "C": -11.989535331726074, |
| "D": -11.94742488861084, |
| "E": -13.421416282653809 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.0302047729492188, |
| "scores": { |
| "A": -8.013933181762695, |
| "B": -11.044137954711914, |
| "C": -12.337331771850586, |
| "D": -11.772050857543945, |
| "E": -14.15876579284668 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-212", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.515838623046875, |
| "scores": { |
| "A": -11.57960319519043, |
| "B": -9.604219436645508, |
| "C": -11.120058059692383, |
| "D": -11.739898681640625, |
| "E": -12.83167839050293 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.382899284362793, |
| "scores": { |
| "A": -4.981387138366699, |
| "B": -8.364286422729492, |
| "C": -11.265626907348633, |
| "D": -9.413225173950195, |
| "E": -11.893355369567871 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.4837512969970703, |
| "scores": { |
| "A": -9.99412727355957, |
| "B": -7.378774642944336, |
| "C": -8.862525939941406, |
| "D": -9.535578727722168, |
| "E": -9.807991981506348 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.5158329010009766, |
| "scores": { |
| "A": -11.579606056213379, |
| "B": -9.604227066040039, |
| "C": -11.120059967041016, |
| "D": -11.739900588989258, |
| "E": -12.831683158874512 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.5158329010009766, |
| "scores": { |
| "A": -11.579606056213379, |
| "B": -9.604227066040039, |
| "C": -11.120059967041016, |
| "D": -11.739900588989258, |
| "E": -12.831683158874512 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.0996170043945312, |
| "scores": { |
| "A": -5.803328514099121, |
| "B": -8.902945518493652, |
| "C": -12.642228126525879, |
| "D": -12.522542953491211, |
| "E": -14.157147407531738 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.5297069549560547, |
| "scores": { |
| "A": -10.015952110290527, |
| "B": -7.30755615234375, |
| "C": -8.837263107299805, |
| "D": -9.575658798217773, |
| "E": -9.859335899353027 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.132758140563965, |
| "scores": { |
| "A": -7.283851623535156, |
| "B": -9.416609764099121, |
| "C": -12.729516983032227, |
| "D": -10.042702674865723, |
| "E": -12.341903686523438 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.382904052734375, |
| "scores": { |
| "A": -4.981382369995117, |
| "B": -8.364286422729492, |
| "C": -11.26562213897705, |
| "D": -9.413222312927246, |
| "E": -11.893354415893555 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-223", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.090752601623535, |
| "scores": { |
| "A": -10.285022735595703, |
| "B": -7.155424118041992, |
| "C": -10.246176719665527, |
| "D": -10.93359375, |
| "E": -11.335384368896484 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.092008590698242, |
| "scores": { |
| "A": -4.6826276779174805, |
| "B": -8.774636268615723, |
| "C": -12.371101379394531, |
| "D": -11.170863151550293, |
| "E": -13.101846694946289 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.5026049613952637, |
| "scores": { |
| "A": -10.422887802124023, |
| "B": -5.899902820587158, |
| "C": -8.402507781982422, |
| "D": -9.963022232055664, |
| "E": -10.305240631103516 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.0907530784606934, |
| "scores": { |
| "A": -10.285022735595703, |
| "B": -7.155422687530518, |
| "C": -10.246175765991211, |
| "D": -10.933595657348633, |
| "E": -11.335386276245117 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.0907530784606934, |
| "scores": { |
| "A": -10.285022735595703, |
| "B": -7.155422687530518, |
| "C": -10.246175765991211, |
| "D": -10.933595657348633, |
| "E": -11.335386276245117 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.352473258972168, |
| "scores": { |
| "A": -4.42826509475708, |
| "B": -6.780738353729248, |
| "C": -12.333539009094238, |
| "D": -11.84121036529541, |
| "E": -13.286885261535645 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.4003238677978516, |
| "scores": { |
| "A": -10.343778610229492, |
| "B": -5.7316436767578125, |
| "C": -8.131967544555664, |
| "D": -9.640758514404297, |
| "E": -10.208199501037598 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.1807565689086914, |
| "scores": { |
| "A": -5.085258483886719, |
| "B": -8.26601505279541, |
| "C": -12.408848762512207, |
| "D": -11.484879493713379, |
| "E": -12.500157356262207 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.092006683349609, |
| "scores": { |
| "A": -4.682626724243164, |
| "B": -8.774633407592773, |
| "C": -12.371101379394531, |
| "D": -11.17086410522461, |
| "E": -13.101846694946289 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-228", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.06908798217773438, |
| "scores": { |
| "A": -13.294260025024414, |
| "B": -10.70706558227539, |
| "C": -10.776153564453125, |
| "D": -14.082728385925293, |
| "E": -14.882830619812012 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.2796850204467773, |
| "scores": { |
| "A": -7.156650543212891, |
| "B": -8.436335563659668, |
| "C": -9.495584487915039, |
| "D": -10.117116928100586, |
| "E": -8.917889595031738 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7942695617675781, |
| "scores": { |
| "A": -8.922301292419434, |
| "B": -6.567187309265137, |
| "C": -7.361456871032715, |
| "D": -9.64250659942627, |
| "E": -9.15162181854248 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.06908607482910156, |
| "scores": { |
| "A": -13.294256210327148, |
| "B": -10.70706558227539, |
| "C": -10.776151657104492, |
| "D": -14.082728385925293, |
| "E": -14.882831573486328 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.06908607482910156, |
| "scores": { |
| "A": -13.294256210327148, |
| "B": -10.70706558227539, |
| "C": -10.776151657104492, |
| "D": -14.082728385925293, |
| "E": -14.882831573486328 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.0041084289550781, |
| "scores": { |
| "A": -5.43384313583374, |
| "B": -6.437951564788818, |
| "C": -6.356510639190674, |
| "D": -9.189103126525879, |
| "E": -8.109259605407715 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.8438491821289062, |
| "scores": { |
| "A": -8.9219388961792, |
| "B": -6.8384809494018555, |
| "C": -7.682330131530762, |
| "D": -9.85693073272705, |
| "E": -9.25981616973877 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.8855304718017578, |
| "scores": { |
| "A": -6.823153495788574, |
| "B": -7.708683967590332, |
| "C": -9.504840850830078, |
| "D": -8.854315757751465, |
| "E": -7.114500999450684 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.2796845436096191, |
| "scores": { |
| "A": -7.156657695770264, |
| "B": -8.436342239379883, |
| "C": -9.495588302612305, |
| "D": -10.117122650146484, |
| "E": -8.917900085449219 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-249", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.821441650390625, |
| "scores": { |
| "A": -10.321834564208984, |
| "B": -8.848502159118652, |
| "C": -8.027060508728027, |
| "D": -11.628623962402344, |
| "E": -11.091792106628418 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.602280616760254, |
| "scores": { |
| "A": -6.007650375366211, |
| "B": -7.634098052978516, |
| "C": -8.609930992126465, |
| "D": -8.443798065185547, |
| "E": -8.685563087463379 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.3504180908203125, |
| "scores": { |
| "A": -8.986002922058105, |
| "B": -6.979213714599609, |
| "C": -7.329631805419922, |
| "D": -10.10708236694336, |
| "E": -9.747851371765137 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.8214454650878906, |
| "scores": { |
| "A": -10.321839332580566, |
| "B": -8.848505973815918, |
| "C": -8.027060508728027, |
| "D": -11.628629684448242, |
| "E": -11.091798782348633 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.8214454650878906, |
| "scores": { |
| "A": -10.321839332580566, |
| "B": -8.848505973815918, |
| "C": -8.027060508728027, |
| "D": -11.628629684448242, |
| "E": -11.091798782348633 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.610713005065918, |
| "scores": { |
| "A": -5.542219161987305, |
| "B": -7.004818916320801, |
| "C": -8.152932167053223, |
| "D": -8.9476957321167, |
| "E": -9.82064151763916 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.41672515869140625, |
| "scores": { |
| "A": -8.892544746398926, |
| "B": -6.766074180603027, |
| "C": -7.182799339294434, |
| "D": -9.836996078491211, |
| "E": -9.51023006439209 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.0196499824523926, |
| "scores": { |
| "A": -3.997561454772949, |
| "B": -5.764638423919678, |
| "C": -6.017211437225342, |
| "D": -5.872786045074463, |
| "E": -6.263749599456787 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.602281093597412, |
| "scores": { |
| "A": -6.007654666900635, |
| "B": -7.634103298187256, |
| "C": -8.609935760498047, |
| "D": -8.443801879882812, |
| "E": -8.685571670532227 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-251", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.07524585723876953, |
| "scores": { |
| "A": -9.908409118652344, |
| "B": -9.833163261413574, |
| "C": -12.424334526062012, |
| "D": -11.275071144104004, |
| "E": -10.72103214263916 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.978281021118164, |
| "scores": { |
| "A": -9.318894386291504, |
| "B": -12.297175407409668, |
| "C": -13.513100624084473, |
| "D": -12.114720344543457, |
| "E": -11.161179542541504 |
| } |
| }, |
| "patched_0": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.53338623046875, |
| "scores": { |
| "A": -9.37672233581543, |
| "B": -8.81983757019043, |
| "C": -10.16126823425293, |
| "D": -10.652963638305664, |
| "E": -9.35322380065918 |
| } |
| }, |
| "patched_01": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.07524681091308594, |
| "scores": { |
| "A": -9.90841007232666, |
| "B": -9.833163261413574, |
| "C": -12.424333572387695, |
| "D": -11.27507209777832, |
| "E": -10.72103500366211 |
| } |
| }, |
| "patched_full": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.07524681091308594, |
| "scores": { |
| "A": -9.90841007232666, |
| "B": -9.833163261413574, |
| "C": -12.424333572387695, |
| "D": -11.27507209777832, |
| "E": -10.72103500366211 |
| } |
| }, |
| "debug_max_abs_diff_patched01_vs_full": 0.0, |
| "control_rand_subspace": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.792086601257324, |
| "scores": { |
| "A": -7.997702598571777, |
| "B": -10.789789199829102, |
| "C": -12.683808326721191, |
| "D": -10.730910301208496, |
| "E": -9.873717308044434 |
| } |
| }, |
| "control_time_shuffled": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.42225170135498047, |
| "scores": { |
| "A": -9.392989158630371, |
| "B": -8.97073745727539, |
| "C": -10.319923400878906, |
| "D": -10.757732391357422, |
| "E": -9.409377098083496 |
| } |
| }, |
| "control_shared_randvec": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.0303096771240234, |
| "scores": { |
| "A": -10.767759323120117, |
| "B": -12.79806900024414, |
| "C": -13.778217315673828, |
| "D": -12.801124572753906, |
| "E": -11.79677963256836 |
| } |
| }, |
| "control_patch_nonshared": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.978278160095215, |
| "scores": { |
| "A": -9.31889533996582, |
| "B": -12.297173500061035, |
| "C": -13.513103485107422, |
| "D": -12.114721298217773, |
| "E": -11.16118049621582 |
| } |
| } |
| } |
| ] |
| } |