decodeshare / artifacts /patch_back /results /patch_results.json
Zishan-Shao's picture
Upload folder using huggingface_hub
aa0e435 verified
{
"meta": {
"model": "meta-llama/Llama-2-7b-chat-hf",
"device": "cuda",
"dtype": "fp32",
"layer": 10,
"task": "aqua",
"eval_meta": {
"subspace_split": null,
"eval_split": "test",
"available_splits": [
"train",
"test",
"validation"
],
"hf_id": "aqua_rat",
"options_prefix_stripped": true,
"force_answer_prefix": true
},
"candidate_labels": [
"A",
"B",
"C",
"D",
"E"
],
"candidate_text_style": "space_letter",
"candidate_token_lens": {
"A": 2,
"B": 2,
"C": 2,
"D": 2,
"E": 2
},
"max_candidate_token_len": 2,
"patch_windows": {
"steps_0": [
0
],
"steps_01": [
0,
1
],
"full_steps": [
0,
1
],
"note": "If steps_01 == full_steps then patched_01 == patched_full by design."
},
"add_special_tokens_prompt": true,
"seed": 123,
"Qs_path": "Q_shared_layer10.npy",
"Qs_shape": [
4096,
97
],
"n_scanned": 254,
"baseline_acc": 0.20866141732283464,
"baseline_correct_n": 53,
"ablated_acc": 0.2204724409448819,
"ablated_correct_n": 56,
"n_flips_total": 42,
"n_flips_used": 42,
"layers_path": "model.layers"
},
"summary_on_flips": {
"patched_0": {
"n": 42,
"rescued": 31,
"rescued_pct": 73.80952380952381,
"mean_dmargin": 3.3110272657303583,
"median_dmargin": 3.0882368087768555
},
"patched_01": {
"n": 42,
"rescued": 42,
"rescued_pct": 100.0,
"mean_dmargin": 3.6945105280194963,
"median_dmargin": 3.317805767059326
},
"patched_full": {
"n": 42,
"rescued": 42,
"rescued_pct": 100.0,
"mean_dmargin": 3.6945105280194963,
"median_dmargin": 3.317805767059326
},
"control_rand_subspace": {
"n": 42,
"rescued": 2,
"rescued_pct": 4.761904761904762,
"mean_dmargin": 0.2848027887798491,
"median_dmargin": 0.27750468254089355
},
"control_shared_randvec": {
"n": 42,
"rescued": 2,
"rescued_pct": 4.761904761904762,
"mean_dmargin": 0.38392406418209984,
"median_dmargin": 0.4009871482849121
},
"control_time_shuffled": {
"n": 42,
"rescued": 32,
"rescued_pct": 76.19047619047619,
"mean_dmargin": 3.2988027050381614,
"median_dmargin": 3.1241049766540527
},
"control_patch_nonshared": {
"n": 42,
"rescued": 0,
"rescued_pct": 0.0,
"mean_dmargin": 9.08261253720238e-08,
"median_dmargin": 0.0
}
},
"scan_rows": [
{
"ex_id": "aqua-test-0",
"gold": "B",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -0.5365619659423828,
"scores": {
"A": -9.953326225280762,
"B": -9.932822227478027,
"C": -9.396260261535645,
"D": -11.750316619873047,
"E": -11.375755310058594
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.246011734008789,
"scores": {
"A": -9.17184829711914,
"B": -10.41786003112793,
"C": -11.056268692016602,
"D": -11.206304550170898,
"E": -11.109382629394531
}
}
},
{
"ex_id": "aqua-test-1",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.03298187255859375,
"scores": {
"A": -12.888943672180176,
"B": -11.506059646606445,
"C": -11.539041519165039,
"D": -13.514416694641113,
"E": -13.277742385864258
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -5.448361873626709,
"scores": {
"A": -6.55993127822876,
"B": -11.09766960144043,
"C": -12.008293151855469,
"D": -11.017435073852539,
"E": -12.980535507202148
}
}
},
{
"ex_id": "aqua-test-2",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.2555389404296875,
"scores": {
"A": -11.233211517333984,
"B": -10.210750579833984,
"C": -13.17569351196289,
"D": -12.437894821166992,
"E": -10.466289520263672
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -7.949008941650391,
"scores": {
"A": -6.06699275970459,
"B": -14.01600170135498,
"C": -17.137845993041992,
"D": -15.27363109588623,
"E": -15.64785099029541
}
}
},
{
"ex_id": "aqua-test-3",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.1184234619140625,
"scores": {
"A": -12.673524856567383,
"B": -8.500896453857422,
"C": -9.619319915771484,
"D": -12.860542297363281,
"E": -15.163476943969727
}
},
"ablated": {
"pred_label": "B",
"correct": false,
"margin": -1.0571308135986328,
"scores": {
"A": -9.761144638061523,
"B": -9.180427551269531,
"C": -10.237558364868164,
"D": -11.047746658325195,
"E": -10.9005126953125
}
}
},
{
"ex_id": "aqua-test-4",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 0.8914222717285156,
"scores": {
"A": -10.94589900970459,
"B": -11.837321281433105,
"C": -13.137775421142578,
"D": -12.691411972045898,
"E": -12.272680282592773
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.8114051818847656,
"scores": {
"A": -8.195756912231445,
"B": -10.760305404663086,
"C": -11.914056777954102,
"D": -10.007162094116211,
"E": -10.895750045776367
}
}
},
{
"ex_id": "aqua-test-5",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.953394889831543,
"scores": {
"A": -11.989723205566406,
"B": -10.97428035736084,
"C": -12.035185813903809,
"D": -11.961091041564941,
"E": -11.927675247192383
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.1877222061157227,
"scores": {
"A": -7.596570014953613,
"B": -9.784292221069336,
"C": -11.036355018615723,
"D": -9.200647354125977,
"E": -10.078826904296875
}
}
},
{
"ex_id": "aqua-test-6",
"gold": "C",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -2.3978919982910156,
"scores": {
"A": -10.834028244018555,
"B": -12.190977096557617,
"C": -13.23192024230957,
"D": -14.303913116455078,
"E": -12.20677375793457
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -5.510491371154785,
"scores": {
"A": -7.008818626403809,
"B": -9.866249084472656,
"C": -12.519309997558594,
"D": -11.021146774291992,
"E": -11.009967803955078
}
}
},
{
"ex_id": "aqua-test-7",
"gold": "D",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.163623809814453,
"scores": {
"A": -10.552425384521484,
"B": -9.352998733520508,
"C": -8.957988739013672,
"D": -11.121612548828125,
"E": -10.957361221313477
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.8754777908325195,
"scores": {
"A": -6.72020149230957,
"B": -10.692610740661621,
"C": -10.54880428314209,
"D": -10.59567928314209,
"E": -12.170318603515625
}
}
},
{
"ex_id": "aqua-test-8",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.255800247192383,
"scores": {
"A": -13.03713607788086,
"B": -11.900215148925781,
"C": -14.156015396118164,
"D": -12.385900497436523,
"E": -14.474089622497559
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -8.033926010131836,
"scores": {
"A": -6.845177173614502,
"B": -11.928691864013672,
"C": -14.87910270690918,
"D": -11.124820709228516,
"E": -12.958259582519531
}
}
},
{
"ex_id": "aqua-test-9",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.0833330154418945,
"scores": {
"A": -11.261035919189453,
"B": -8.873366355895996,
"C": -9.95669937133789,
"D": -12.33233642578125,
"E": -13.964797973632812
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.0129852294921875,
"scores": {
"A": -7.305376052856445,
"B": -11.318361282348633,
"C": -11.48718547821045,
"D": -13.66738224029541,
"E": -15.269938468933105
}
}
},
{
"ex_id": "aqua-test-10",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.20740604400634766,
"scores": {
"A": -11.915353775024414,
"B": -10.265600204467773,
"C": -13.313862800598145,
"D": -11.45443344116211,
"E": -10.473006248474121
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -7.0987701416015625,
"scores": {
"A": -7.162988662719727,
"B": -11.00853157043457,
"C": -14.745489120483398,
"D": -12.574932098388672,
"E": -14.261758804321289
}
}
},
{
"ex_id": "aqua-test-11",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.032149314880371,
"scores": {
"A": -11.705740928649902,
"B": -10.383201599121094,
"C": -12.561548233032227,
"D": -12.598165512084961,
"E": -12.415350914001465
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.6796650886535645,
"scores": {
"A": -6.977108478546143,
"B": -10.32332992553711,
"C": -13.413308143615723,
"D": -11.12005615234375,
"E": -13.656773567199707
}
}
},
{
"ex_id": "aqua-test-12",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.4920692443847656,
"scores": {
"A": -12.802289962768555,
"B": -9.024707794189453,
"C": -11.516777038574219,
"D": -11.572513580322266,
"E": -13.012077331542969
}
},
"ablated": {
"pred_label": "D",
"correct": false,
"margin": -3.181248664855957,
"scores": {
"A": -8.80724048614502,
"B": -9.62839126586914,
"C": -11.967851638793945,
"D": -8.786602973937988,
"E": -11.410276412963867
}
}
},
{
"ex_id": "aqua-test-13",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.793048858642578,
"scores": {
"A": -12.858366012573242,
"B": -9.46006965637207,
"C": -9.54768180847168,
"D": -12.253118515014648,
"E": -12.870738983154297
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.821455955505371,
"scores": {
"A": -6.2456769943237305,
"B": -9.579475402832031,
"C": -8.059391021728516,
"D": -10.067132949829102,
"E": -13.029922485351562
}
}
},
{
"ex_id": "aqua-test-14",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.6211061477661133,
"scores": {
"A": -12.599227905273438,
"B": -9.088578224182129,
"C": -10.55274772644043,
"D": -10.709684371948242,
"E": -9.514959335327148
}
},
"ablated": {
"pred_label": "D",
"correct": true,
"margin": 0.46077728271484375,
"scores": {
"A": -7.140524864196777,
"B": -8.438863754272461,
"C": -9.106466293334961,
"D": -6.679747581481934,
"E": -8.679572105407715
}
}
},
{
"ex_id": "aqua-test-15",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.6604747772216797,
"scores": {
"A": -11.07632064819336,
"B": -10.41584587097168,
"C": -13.610551834106445,
"D": -15.297096252441406,
"E": -13.782489776611328
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.743229389190674,
"scores": {
"A": -6.1119704246521,
"B": -10.855199813842773,
"C": -11.251523971557617,
"D": -11.053302764892578,
"E": -13.566537857055664
}
}
},
{
"ex_id": "aqua-test-16",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 2.796067237854004,
"scores": {
"A": -12.479905128479004,
"B": -10.507231712341309,
"C": -7.711164474487305,
"D": -12.827747344970703,
"E": -12.807977676391602
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.861086368560791,
"scores": {
"A": -7.834758281707764,
"B": -9.467061996459961,
"C": -8.695844650268555,
"D": -9.597942352294922,
"E": -11.696287155151367
}
}
},
{
"ex_id": "aqua-test-17",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.0560503005981445,
"scores": {
"A": -11.58172607421875,
"B": -10.525675773620605,
"C": -12.54155158996582,
"D": -12.84415054321289,
"E": -12.90414810180664
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 4.481626510620117,
"scores": {
"A": -7.197931289672852,
"B": -12.279987335205078,
"C": -14.135135650634766,
"D": -11.679557800292969,
"E": -15.510787963867188
}
}
},
{
"ex_id": "aqua-test-18",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.3985824584960938,
"scores": {
"A": -11.516077041625977,
"B": -10.966100692749023,
"C": -13.956039428710938,
"D": -12.364683151245117,
"E": -12.156122207641602
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.602712631225586,
"scores": {
"A": -7.199901580810547,
"B": -10.85714340209961,
"C": -14.79636001586914,
"D": -11.802614212036133,
"E": -15.477705001831055
}
}
},
{
"ex_id": "aqua-test-19",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -0.7771091461181641,
"scores": {
"A": -9.50829792022705,
"B": -9.048941612243652,
"C": -8.731188774108887,
"D": -10.535305976867676,
"E": -11.122632026672363
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 3.3860559463500977,
"scores": {
"A": -5.540165901184082,
"B": -9.812980651855469,
"C": -8.92622184753418,
"D": -10.782073974609375,
"E": -11.889626502990723
}
}
},
{
"ex_id": "aqua-test-20",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.8421554565429688,
"scores": {
"A": -10.2200345993042,
"B": -9.71203899383545,
"C": -9.828819274902344,
"D": -11.333096504211426,
"E": -11.554194450378418
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -7.804967880249023,
"scores": {
"A": -6.537824630737305,
"B": -9.35693645477295,
"C": -12.991019248962402,
"D": -10.14554500579834,
"E": -14.342792510986328
}
}
},
{
"ex_id": "aqua-test-21",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.7691888809204102,
"scores": {
"A": -10.39490795135498,
"B": -9.62571907043457,
"C": -12.538268089294434,
"D": -12.220020294189453,
"E": -11.351235389709473
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.5374608039855957,
"scores": {
"A": -6.129680156707764,
"B": -9.66714096069336,
"C": -10.800978660583496,
"D": -10.67288875579834,
"E": -11.187762260437012
}
}
},
{
"ex_id": "aqua-test-22",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.974935531616211,
"scores": {
"A": -9.757966995239258,
"B": -8.531810760498047,
"C": -11.770942687988281,
"D": -12.615520477294922,
"E": -11.506746292114258
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -7.905245304107666,
"scores": {
"A": -6.365554332733154,
"B": -10.006322860717773,
"C": -12.58491325378418,
"D": -13.247224807739258,
"E": -14.27079963684082
}
}
},
{
"ex_id": "aqua-test-23",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.1230697631835938,
"scores": {
"A": -9.921218872070312,
"B": -8.795772552490234,
"C": -10.068075180053711,
"D": -11.918842315673828,
"E": -10.69045639038086
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.665317535400391,
"scores": {
"A": -6.0819854736328125,
"B": -10.304386138916016,
"C": -12.034563064575195,
"D": -12.747303009033203,
"E": -13.464553833007812
}
}
},
{
"ex_id": "aqua-test-24",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.5330438613891602,
"scores": {
"A": -11.630638122558594,
"B": -10.715802192687988,
"C": -13.269601821899414,
"D": -11.420013427734375,
"E": -11.248846054077148
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.1090230941772461,
"scores": {
"A": -7.273715019226074,
"B": -7.602567672729492,
"C": -9.413043975830078,
"D": -7.838529586791992,
"E": -7.38273811340332
}
}
},
{
"ex_id": "aqua-test-25",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.06520843505859375,
"scores": {
"A": -12.949111938476562,
"B": -12.246522903442383,
"C": -12.181314468383789,
"D": -12.397541046142578,
"E": -13.614669799804688
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.5791339874267578,
"scores": {
"A": -8.643856048583984,
"B": -10.894746780395508,
"C": -10.222990036010742,
"D": -9.472063064575195,
"E": -10.86764144897461
}
}
},
{
"ex_id": "aqua-test-26",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -3.831747055053711,
"scores": {
"A": -12.59067440032959,
"B": -10.750125885009766,
"C": -8.758927345275879,
"D": -12.953798294067383,
"E": -10.869454383850098
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 0.6270813941955566,
"scores": {
"A": -7.691537380218506,
"B": -10.792871475219727,
"C": -8.318618774414062,
"D": -9.645444869995117,
"E": -11.4267578125
}
}
},
{
"ex_id": "aqua-test-27",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.8056774139404297,
"scores": {
"A": -10.92805290222168,
"B": -9.627052307128906,
"C": -10.676057815551758,
"D": -12.432729721069336,
"E": -12.006237030029297
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.3288869857788086,
"scores": {
"A": -8.168229103088379,
"B": -10.158320426940918,
"C": -11.378348350524902,
"D": -10.497116088867188,
"E": -11.294864654541016
}
}
},
{
"ex_id": "aqua-test-28",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 1.3396778106689453,
"scores": {
"A": -10.445246696472168,
"B": -11.784924507141113,
"C": -12.745423316955566,
"D": -13.060553550720215,
"E": -13.08572769165039
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 4.244240760803223,
"scores": {
"A": -9.513802528381348,
"B": -13.75804328918457,
"C": -14.599481582641602,
"D": -14.581493377685547,
"E": -13.772315979003906
}
}
},
{
"ex_id": "aqua-test-29",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.6846466064453125,
"scores": {
"A": -12.935235977172852,
"B": -10.250589370727539,
"C": -10.775646209716797,
"D": -10.991680145263672,
"E": -12.39478874206543
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.3767662048339844,
"scores": {
"A": -8.437541961669922,
"B": -10.61314868927002,
"C": -11.896113395690918,
"D": -9.814308166503906,
"E": -11.01830005645752
}
}
},
{
"ex_id": "aqua-test-30",
"gold": "D",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -4.338037490844727,
"scores": {
"A": -11.664570808410645,
"B": -11.412707328796387,
"C": -11.081539154052734,
"D": -15.419576644897461,
"E": -11.825617790222168
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.817980766296387,
"scores": {
"A": -6.548068046569824,
"B": -12.580531120300293,
"C": -14.352285385131836,
"D": -13.366048812866211,
"E": -12.250596046447754
}
}
},
{
"ex_id": "aqua-test-31",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.5094156265258789,
"scores": {
"A": -10.960805892944336,
"B": -9.841753005981445,
"C": -10.351168632507324,
"D": -10.83080768585205,
"E": -11.36546802520752
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -5.294549942016602,
"scores": {
"A": -6.641495704650879,
"B": -10.112987518310547,
"C": -11.93604564666748,
"D": -10.474628448486328,
"E": -9.680524826049805
}
}
},
{
"ex_id": "aqua-test-32",
"gold": "B",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -0.4654970169067383,
"scores": {
"A": -12.72982406616211,
"B": -10.999740600585938,
"C": -10.5342435836792,
"D": -12.491869926452637,
"E": -11.609811782836914
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.625300884246826,
"scores": {
"A": -6.746448993682861,
"B": -10.371749877929688,
"C": -10.17892074584961,
"D": -12.78900146484375,
"E": -13.765708923339844
}
}
},
{
"ex_id": "aqua-test-33",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 1.2154502868652344,
"scores": {
"A": -17.279247283935547,
"B": -18.187232971191406,
"C": -16.063796997070312,
"D": -19.143869400024414,
"E": -19.470874786376953
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.4487724304199219,
"scores": {
"A": -9.145519256591797,
"B": -10.157659530639648,
"C": -9.594291687011719,
"D": -10.095281600952148,
"E": -10.523807525634766
}
}
},
{
"ex_id": "aqua-test-34",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.934541702270508,
"scores": {
"A": -14.833627700805664,
"B": -11.353015899658203,
"C": -14.715003967285156,
"D": -14.287557601928711,
"E": -15.349594116210938
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -5.323911666870117,
"scores": {
"A": -9.540066719055176,
"B": -11.480504035949707,
"C": -14.825972557067871,
"D": -14.863978385925293,
"E": -18.370067596435547
}
}
},
{
"ex_id": "aqua-test-35",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.0334539413452148,
"scores": {
"A": -11.940654754638672,
"B": -10.587756156921387,
"C": -11.621210098266602,
"D": -11.252909660339355,
"E": -11.599471092224121
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.0078792572021484,
"scores": {
"A": -8.075170516967773,
"B": -10.238532066345215,
"C": -10.083049774169922,
"D": -9.30896282196045,
"E": -9.410977363586426
}
}
},
{
"ex_id": "aqua-test-36",
"gold": "E",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -1.7479333877563477,
"scores": {
"A": -9.8831205368042,
"B": -10.474870681762695,
"C": -10.734328269958496,
"D": -12.681618690490723,
"E": -11.631053924560547
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.574769020080566,
"scores": {
"A": -8.31888198852539,
"B": -12.751762390136719,
"C": -12.679941177368164,
"D": -14.572219848632812,
"E": -14.893651008605957
}
}
},
{
"ex_id": "aqua-test-37",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.9962596893310547,
"scores": {
"A": -11.241470336914062,
"B": -10.501060485839844,
"C": -13.315231323242188,
"D": -12.760457992553711,
"E": -11.497320175170898
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.8308868408203125,
"scores": {
"A": -9.542606353759766,
"B": -10.720484733581543,
"C": -12.237710952758789,
"D": -10.93893814086914,
"E": -10.373493194580078
}
}
},
{
"ex_id": "aqua-test-38",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.5876941680908203,
"scores": {
"A": -13.907208442687988,
"B": -12.088470458984375,
"C": -17.00652313232422,
"D": -16.789785385131836,
"E": -13.676164627075195
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.61981201171875,
"scores": {
"A": -8.97873306274414,
"B": -11.468414306640625,
"C": -14.408849716186523,
"D": -12.497419357299805,
"E": -12.59854507446289
}
}
},
{
"ex_id": "aqua-test-39",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 1.704728126525879,
"scores": {
"A": -10.207995414733887,
"B": -11.912723541259766,
"C": -12.109935760498047,
"D": -14.276583671569824,
"E": -13.992156982421875
}
},
"ablated": {
"pred_label": "C",
"correct": false,
"margin": -0.4076976776123047,
"scores": {
"A": -10.194977760314941,
"B": -10.153923988342285,
"C": -9.787280082702637,
"D": -11.554168701171875,
"E": -10.806174278259277
}
}
},
{
"ex_id": "aqua-test-40",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.1876583099365234,
"scores": {
"A": -9.934663772583008,
"B": -8.747005462646484,
"C": -8.951501846313477,
"D": -11.43099308013916,
"E": -10.340058326721191
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 2.8720083236694336,
"scores": {
"A": -6.977086067199707,
"B": -10.881498336791992,
"C": -10.488456726074219,
"D": -9.84909439086914,
"E": -9.965597152709961
}
}
},
{
"ex_id": "aqua-test-41",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.4858741760253906,
"scores": {
"A": -13.249530792236328,
"B": -12.764188766479492,
"C": -14.671175003051758,
"D": -16.250062942504883,
"E": -16.50116539001465
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.068471908569336,
"scores": {
"A": -8.645910263061523,
"B": -9.370107650756836,
"C": -10.64356803894043,
"D": -10.71438217163086,
"E": -10.385175704956055
}
}
},
{
"ex_id": "aqua-test-42",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.9857778549194336,
"scores": {
"A": -14.824063301086426,
"B": -12.47767448425293,
"C": -12.883535385131836,
"D": -13.463452339172363,
"E": -14.847708702087402
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.4033865928649902,
"scores": {
"A": -6.698444843292236,
"B": -11.148558616638184,
"C": -12.872434616088867,
"D": -10.101831436157227,
"E": -12.350337982177734
}
}
},
{
"ex_id": "aqua-test-43",
"gold": "D",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -0.739567756652832,
"scores": {
"A": -12.141595840454102,
"B": -11.716957092285156,
"C": -11.391490936279297,
"D": -12.131058692932129,
"E": -13.988408088684082
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.8107433319091797,
"scores": {
"A": -5.002326965332031,
"B": -7.882379531860352,
"C": -9.065218925476074,
"D": -6.813070297241211,
"E": -9.598858833312988
}
}
},
{
"ex_id": "aqua-test-44",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 0.3987007141113281,
"scores": {
"A": -10.112314224243164,
"B": -10.511014938354492,
"C": -10.519290924072266,
"D": -12.189737319946289,
"E": -15.004023551940918
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 3.4644508361816406,
"scores": {
"A": -7.429119110107422,
"B": -11.428091049194336,
"C": -14.222383499145508,
"D": -10.893569946289062,
"E": -14.403785705566406
}
}
},
{
"ex_id": "aqua-test-45",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -1.0466318130493164,
"scores": {
"A": -12.969017028808594,
"B": -12.31945514678955,
"C": -11.922385215759277,
"D": -12.34321403503418,
"E": -12.47985553741455
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 2.317699909210205,
"scores": {
"A": -7.05304479598999,
"B": -9.370744705200195,
"C": -10.5771484375,
"D": -9.985260009765625,
"E": -12.94332504272461
}
}
},
{
"ex_id": "aqua-test-46",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.7821559906005859,
"scores": {
"A": -12.967859268188477,
"B": -11.404945373535156,
"C": -11.63718032836914,
"D": -11.908271789550781,
"E": -12.187101364135742
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.7385272979736328,
"scores": {
"A": -7.029201507568359,
"B": -8.36732292175293,
"C": -10.496192932128906,
"D": -8.434120178222656,
"E": -8.767728805541992
}
}
},
{
"ex_id": "aqua-test-47",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.20550537109375,
"scores": {
"A": -11.954267501831055,
"B": -12.503751754760742,
"C": -12.114371299743652,
"D": -13.045472145080566,
"E": -11.748762130737305
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.0157623291015625,
"scores": {
"A": -9.386420249938965,
"B": -11.835212707519531,
"C": -13.338075637817383,
"D": -12.148918151855469,
"E": -13.402182579040527
}
}
},
{
"ex_id": "aqua-test-48",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.2838096618652344,
"scores": {
"A": -14.12697982788086,
"B": -14.659561157226562,
"C": -9.08004379272461,
"D": -10.326372146606445,
"E": -11.363853454589844
}
},
"ablated": {
"pred_label": "C",
"correct": false,
"margin": -1.4984474182128906,
"scores": {
"A": -7.6805315017700195,
"B": -8.929984092712402,
"C": -6.419025421142578,
"D": -7.822979927062988,
"E": -7.917472839355469
}
}
},
{
"ex_id": "aqua-test-49",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -4.129861831665039,
"scores": {
"A": -15.956474304199219,
"B": -11.82661247253418,
"C": -14.917438507080078,
"D": -13.226446151733398,
"E": -13.977205276489258
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.0888185501098633,
"scores": {
"A": -8.945561408996582,
"B": -10.034379959106445,
"C": -13.530162811279297,
"D": -10.250844955444336,
"E": -12.504024505615234
}
}
},
{
"ex_id": "aqua-test-50",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.279160499572754,
"scores": {
"A": -9.496601104736328,
"B": -8.565024375915527,
"C": -9.976577758789062,
"D": -10.844184875488281,
"E": -10.707534790039062
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.749849319458008,
"scores": {
"A": -7.051667213439941,
"B": -10.064657211303711,
"C": -12.560236930847168,
"D": -10.80151653289795,
"E": -13.33292007446289
}
}
},
{
"ex_id": "aqua-test-51",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.252643585205078,
"scores": {
"A": -11.461423873901367,
"B": -9.536659240722656,
"C": -9.648260116577148,
"D": -12.789302825927734,
"E": -11.993169784545898
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -5.026073455810547,
"scores": {
"A": -5.237083435058594,
"B": -8.581623077392578,
"C": -12.21021842956543,
"D": -10.26315689086914,
"E": -11.514408111572266
}
}
},
{
"ex_id": "aqua-test-52",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.04578971862792969,
"scores": {
"A": -12.882274627685547,
"B": -9.855215072631836,
"C": -9.901004791259766,
"D": -11.499755859375,
"E": -10.678110122680664
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.4922494888305664,
"scores": {
"A": -5.136632442474365,
"B": -5.628881931304932,
"C": -6.605200290679932,
"D": -6.88695764541626,
"E": -6.429419994354248
}
}
},
{
"ex_id": "aqua-test-53",
"gold": "D",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -4.019079208374023,
"scores": {
"A": -12.729389190673828,
"B": -12.427694320678711,
"C": -8.400447845458984,
"D": -12.419527053833008,
"E": -13.457754135131836
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.768967628479004,
"scores": {
"A": -4.923480033874512,
"B": -8.778578758239746,
"C": -8.191584587097168,
"D": -9.692447662353516,
"E": -10.092605590820312
}
}
},
{
"ex_id": "aqua-test-54",
"gold": "D",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.2229089736938477,
"scores": {
"A": -12.411125183105469,
"B": -12.760860443115234,
"C": -11.302736282348633,
"D": -13.52564525604248,
"E": -11.65049934387207
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -5.5538482666015625,
"scores": {
"A": -7.524580955505371,
"B": -12.95750904083252,
"C": -11.718106269836426,
"D": -13.078429222106934,
"E": -12.335714340209961
}
}
},
{
"ex_id": "aqua-test-55",
"gold": "B",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -0.8632240295410156,
"scores": {
"A": -10.496152877807617,
"B": -10.564685821533203,
"C": -9.701461791992188,
"D": -13.170589447021484,
"E": -11.492547988891602
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.287240982055664,
"scores": {
"A": -8.019121170043945,
"B": -10.30636215209961,
"C": -11.232714653015137,
"D": -11.299230575561523,
"E": -13.430822372436523
}
}
},
{
"ex_id": "aqua-test-56",
"gold": "D",
"baseline": {
"pred_label": "E",
"correct": false,
"margin": -1.5651264190673828,
"scores": {
"A": -10.601175308227539,
"B": -11.313573837280273,
"C": -12.363874435424805,
"D": -12.14034652709961,
"E": -10.575220108032227
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.2690534591674805,
"scores": {
"A": -7.066210746765137,
"B": -8.967557907104492,
"C": -10.526098251342773,
"D": -8.335264205932617,
"E": -9.71631145477295
}
}
},
{
"ex_id": "aqua-test-57",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.14557647705078125,
"scores": {
"A": -13.898555755615234,
"B": -12.992910385131836,
"C": -14.371723175048828,
"D": -14.158893585205078,
"E": -13.138486862182617
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.6548147201538086,
"scores": {
"A": -10.324930191040039,
"B": -12.979744911193848,
"C": -12.848653793334961,
"D": -12.86312484741211,
"E": -12.547582626342773
}
}
},
{
"ex_id": "aqua-test-58",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.41909122467041,
"scores": {
"A": -15.519378662109375,
"B": -10.206266403198242,
"C": -13.625357627868652,
"D": -15.576879501342773,
"E": -14.738330841064453
}
},
"ablated": {
"pred_label": "B",
"correct": false,
"margin": -6.554704189300537,
"scores": {
"A": -10.922317504882812,
"B": -7.338093280792236,
"C": -13.892797470092773,
"D": -11.01749038696289,
"E": -13.093095779418945
}
}
},
{
"ex_id": "aqua-test-59",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.0286178588867188,
"scores": {
"A": -9.556885719299316,
"B": -8.528267860412598,
"C": -9.387777328491211,
"D": -11.924543380737305,
"E": -12.156147003173828
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 2.6055479049682617,
"scores": {
"A": -7.506214141845703,
"B": -10.111762046813965,
"C": -10.535852432250977,
"D": -12.113842010498047,
"E": -13.108339309692383
}
}
},
{
"ex_id": "aqua-test-60",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -4.278145790100098,
"scores": {
"A": -9.984968185424805,
"B": -9.964075088500977,
"C": -10.470348358154297,
"D": -14.242220878601074,
"E": -14.801360130310059
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.694246292114258,
"scores": {
"A": -5.662052154541016,
"B": -9.853955268859863,
"C": -11.739667892456055,
"D": -12.356298446655273,
"E": -13.508790016174316
}
}
},
{
"ex_id": "aqua-test-61",
"gold": "E",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -1.4795856475830078,
"scores": {
"A": -11.702659606933594,
"B": -13.152563095092773,
"C": -15.516944885253906,
"D": -12.736021041870117,
"E": -13.182245254516602
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.2732534408569336,
"scores": {
"A": -7.3774213790893555,
"B": -8.904997825622559,
"C": -12.49362564086914,
"D": -8.682700157165527,
"E": -9.650674819946289
}
}
},
{
"ex_id": "aqua-test-62",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -4.083756446838379,
"scores": {
"A": -12.819320678710938,
"B": -9.86478042602539,
"C": -8.735564231872559,
"D": -13.259029388427734,
"E": -14.102011680603027
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 2.9089107513427734,
"scores": {
"A": -5.8731231689453125,
"B": -8.782033920288086,
"C": -8.930816650390625,
"D": -11.697149276733398,
"E": -13.882165908813477
}
}
},
{
"ex_id": "aqua-test-63",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.828573226928711,
"scores": {
"A": -12.999687194824219,
"B": -11.171113967895508,
"C": -12.38466739654541,
"D": -15.068181991577148,
"E": -14.821438789367676
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 6.530241966247559,
"scores": {
"A": -9.71985149383545,
"B": -16.808002471923828,
"C": -17.539220809936523,
"D": -16.250093460083008,
"E": -17.91951560974121
}
}
},
{
"ex_id": "aqua-test-64",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.11572933197021484,
"scores": {
"A": -11.253127098083496,
"B": -9.855234146118164,
"C": -9.970963478088379,
"D": -11.471985816955566,
"E": -13.291877746582031
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.368386745452881,
"scores": {
"A": -7.864134311676025,
"B": -8.514406204223633,
"C": -10.232521057128906,
"D": -9.923612594604492,
"E": -10.108715057373047
}
}
},
{
"ex_id": "aqua-test-65",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.8041362762451172,
"scores": {
"A": -11.474063873291016,
"B": -10.669927597045898,
"C": -11.573220252990723,
"D": -11.306943893432617,
"E": -12.886905670166016
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 2.3487911224365234,
"scores": {
"A": -8.487371444702148,
"B": -10.836162567138672,
"C": -13.451092720031738,
"D": -12.694389343261719,
"E": -12.682896614074707
}
}
},
{
"ex_id": "aqua-test-66",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.141798973083496,
"scores": {
"A": -12.508940696716309,
"B": -10.861954689025879,
"C": -12.234925270080566,
"D": -11.884855270385742,
"E": -13.003753662109375
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.2710695266723633,
"scores": {
"A": -8.62102222442627,
"B": -10.894733428955078,
"C": -14.39864730834961,
"D": -10.672046661376953,
"E": -11.892091751098633
}
}
},
{
"ex_id": "aqua-test-67",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.341836929321289,
"scores": {
"A": -8.820962905883789,
"B": -8.544965744018555,
"C": -9.699121475219727,
"D": -9.886802673339844,
"E": -10.276521682739258
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.691999435424805,
"scores": {
"A": -6.632804870605469,
"B": -11.282808303833008,
"C": -13.191905975341797,
"D": -11.324804306030273,
"E": -13.604455947875977
}
}
},
{
"ex_id": "aqua-test-68",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.6945219039916992,
"scores": {
"A": -11.65401840209961,
"B": -10.95949649810791,
"C": -11.869510650634766,
"D": -12.070514678955078,
"E": -12.618841171264648
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.171473503112793,
"scores": {
"A": -9.029966354370117,
"B": -11.20143985748291,
"C": -11.244144439697266,
"D": -11.500038146972656,
"E": -10.598958015441895
}
}
},
{
"ex_id": "aqua-test-69",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.48809242248535156,
"scores": {
"A": -12.742959976196289,
"B": -10.583757400512695,
"C": -11.071849822998047,
"D": -14.073648452758789,
"E": -13.576339721679688
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.911890983581543,
"scores": {
"A": -6.22026252746582,
"B": -9.122340202331543,
"C": -13.132153511047363,
"D": -11.907660484313965,
"E": -11.635204315185547
}
}
},
{
"ex_id": "aqua-test-70",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.720950126647949,
"scores": {
"A": -12.35704231262207,
"B": -8.636092185974121,
"C": -13.832864761352539,
"D": -10.753250122070312,
"E": -13.392253875732422
}
},
"ablated": {
"pred_label": "B",
"correct": false,
"margin": -0.03596019744873047,
"scores": {
"A": -7.090466499328613,
"B": -7.054506301879883,
"C": -11.16434097290039,
"D": -7.422432899475098,
"E": -10.723372459411621
}
}
},
{
"ex_id": "aqua-test-71",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -3.9103527069091797,
"scores": {
"A": -14.14399528503418,
"B": -15.403404235839844,
"C": -10.233642578125,
"D": -13.802553176879883,
"E": -11.533794403076172
}
},
"ablated": {
"pred_label": "E",
"correct": false,
"margin": -0.41689586639404297,
"scores": {
"A": -8.586220741271973,
"B": -12.792287826538086,
"C": -9.270968437194824,
"D": -9.66348648071289,
"E": -8.16932487487793
}
}
},
{
"ex_id": "aqua-test-72",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.3486766815185547,
"scores": {
"A": -14.158398628234863,
"B": -12.339851379394531,
"C": -13.015729904174805,
"D": -13.451315879821777,
"E": -13.688528060913086
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.4561805725097656,
"scores": {
"A": -8.697513580322266,
"B": -9.179859161376953,
"C": -10.927139282226562,
"D": -9.569753646850586,
"E": -10.153694152832031
}
}
},
{
"ex_id": "aqua-test-73",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -6.728538513183594,
"scores": {
"A": -15.75730037689209,
"B": -13.720550537109375,
"C": -10.241283416748047,
"D": -14.861220359802246,
"E": -16.96982192993164
}
},
"ablated": {
"pred_label": "C",
"correct": false,
"margin": -4.603492736816406,
"scores": {
"A": -7.970151901245117,
"B": -8.63032341003418,
"C": -5.910724639892578,
"D": -10.074857711791992,
"E": -10.514217376708984
}
}
},
{
"ex_id": "aqua-test-74",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -1.4039888381958008,
"scores": {
"A": -12.737687110900879,
"B": -11.68657112121582,
"C": -11.333698272705078,
"D": -13.284832954406738,
"E": -14.690400123596191
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 0.2767963409423828,
"scores": {
"A": -7.744673728942871,
"B": -9.163191795349121,
"C": -8.021470069885254,
"D": -8.26796817779541,
"E": -8.809639930725098
}
}
},
{
"ex_id": "aqua-test-75",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.457437515258789,
"scores": {
"A": -12.093403816223145,
"B": -8.635966300964355,
"C": -10.58320140838623,
"D": -12.374037742614746,
"E": -13.680496215820312
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 4.63665771484375,
"scores": {
"A": -6.7574872970581055,
"B": -11.394145011901855,
"C": -12.999401092529297,
"D": -11.796443939208984,
"E": -13.318641662597656
}
}
},
{
"ex_id": "aqua-test-76",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.5669078826904297,
"scores": {
"A": -13.41685676574707,
"B": -11.04054069519043,
"C": -11.60744857788086,
"D": -15.16108512878418,
"E": -14.487443923950195
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.9996509552001953,
"scores": {
"A": -10.679261207580566,
"B": -12.45645523071289,
"C": -12.678912162780762,
"D": -12.86469554901123,
"E": -14.037067413330078
}
}
},
{
"ex_id": "aqua-test-77",
"gold": "D",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.8945484161376953,
"scores": {
"A": -12.302556991577148,
"B": -10.522138595581055,
"C": -9.23642349243164,
"D": -12.130971908569336,
"E": -14.117457389831543
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -5.082538604736328,
"scores": {
"A": -6.24571418762207,
"B": -8.398621559143066,
"C": -11.599692344665527,
"D": -11.328252792358398,
"E": -12.078139305114746
}
}
},
{
"ex_id": "aqua-test-78",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.0808143615722656,
"scores": {
"A": -12.794174194335938,
"B": -8.323003768920898,
"C": -11.403818130493164,
"D": -13.768218994140625,
"E": -13.847496032714844
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.647706031799316,
"scores": {
"A": -5.634004592895508,
"B": -10.281710624694824,
"C": -11.297346115112305,
"D": -12.075166702270508,
"E": -12.413890838623047
}
}
},
{
"ex_id": "aqua-test-79",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.665989875793457,
"scores": {
"A": -11.933293342590332,
"B": -11.267303466796875,
"C": -12.420202255249023,
"D": -11.657835006713867,
"E": -12.877152442932129
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.082460880279541,
"scores": {
"A": -7.030532360076904,
"B": -8.325664520263672,
"C": -10.181509017944336,
"D": -8.112993240356445,
"E": -9.957942962646484
}
}
},
{
"ex_id": "aqua-test-80",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.5075702667236328,
"scores": {
"A": -10.707103729248047,
"B": -9.199533462524414,
"C": -10.327856063842773,
"D": -10.43326187133789,
"E": -11.281829833984375
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.147303581237793,
"scores": {
"A": -5.987746238708496,
"B": -7.135049819946289,
"C": -8.613941192626953,
"D": -7.509088516235352,
"E": -7.937631607055664
}
}
},
{
"ex_id": "aqua-test-81",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.7157487869262695,
"scores": {
"A": -12.188438415527344,
"B": -11.99388599395752,
"C": -10.619071006774902,
"D": -14.215484619140625,
"E": -13.334819793701172
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.729328155517578,
"scores": {
"A": -10.309041976928711,
"B": -13.062793731689453,
"C": -12.420219421386719,
"D": -12.16856575012207,
"E": -13.038370132446289
}
}
},
{
"ex_id": "aqua-test-82",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.778050422668457,
"scores": {
"A": -12.289058685302734,
"B": -9.830384254455566,
"C": -12.608434677124023,
"D": -13.991266250610352,
"E": -11.78373908996582
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.322303771972656,
"scores": {
"A": -6.604994773864746,
"B": -9.52199649810791,
"C": -10.927298545837402,
"D": -10.680765151977539,
"E": -9.880135536193848
}
}
},
{
"ex_id": "aqua-test-83",
"gold": "D",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -1.9063081741333008,
"scores": {
"A": -10.316740036010742,
"B": -9.180550575256348,
"C": -8.257037162780762,
"D": -10.163345336914062,
"E": -9.424388885498047
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.0506300926208496,
"scores": {
"A": -5.58945894241333,
"B": -7.652187824249268,
"C": -9.419204711914062,
"D": -8.64008903503418,
"E": -10.067176818847656
}
}
},
{
"ex_id": "aqua-test-84",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.371249198913574,
"scores": {
"A": -11.472586631774902,
"B": -10.989583015441895,
"C": -9.101337432861328,
"D": -9.797515869140625,
"E": -10.398811340332031
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.0994462966918945,
"scores": {
"A": -9.702713012695312,
"B": -11.570058822631836,
"C": -11.636595726013184,
"D": -10.802159309387207,
"E": -11.598857879638672
}
}
},
{
"ex_id": "aqua-test-85",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.0057201385498047,
"scores": {
"A": -11.025768280029297,
"B": -9.936981201171875,
"C": -9.020048141479492,
"D": -13.46237564086914,
"E": -13.570629119873047
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 3.6366686820983887,
"scores": {
"A": -6.244076251983643,
"B": -10.921756744384766,
"C": -9.880744934082031,
"D": -11.773923873901367,
"E": -14.056009292602539
}
}
},
{
"ex_id": "aqua-test-86",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.7587852478027344,
"scores": {
"A": -10.227313995361328,
"B": -8.468528747558594,
"C": -10.537178039550781,
"D": -12.026582717895508,
"E": -11.197158813476562
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.7784309387207031,
"scores": {
"A": -9.244726181030273,
"B": -11.023157119750977,
"C": -12.277462005615234,
"D": -12.193278312683105,
"E": -11.235006332397461
}
}
},
{
"ex_id": "aqua-test-87",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.6314544677734375,
"scores": {
"A": -9.793952941894531,
"B": -9.162498474121094,
"C": -11.231021881103516,
"D": -12.002910614013672,
"E": -11.467964172363281
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.9217519760131836,
"scores": {
"A": -6.585877418518066,
"B": -9.50762939453125,
"C": -9.712257385253906,
"D": -9.212251663208008,
"E": -11.261186599731445
}
}
},
{
"ex_id": "aqua-test-88",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.4125795364379883,
"scores": {
"A": -14.69003963470459,
"B": -12.324016571044922,
"C": -13.061227798461914,
"D": -13.971894264221191,
"E": -13.73659610748291
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.924210071563721,
"scores": {
"A": -5.407630443572998,
"B": -6.251180171966553,
"C": -7.470930576324463,
"D": -9.033124923706055,
"E": -10.331840515136719
}
}
},
{
"ex_id": "aqua-test-89",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.028618812561035,
"scores": {
"A": -10.8992919921875,
"B": -7.870673179626465,
"C": -10.651062965393066,
"D": -12.425169944763184,
"E": -11.295161247253418
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.749924659729004,
"scores": {
"A": -6.359846115112305,
"B": -8.109770774841309,
"C": -11.431536674499512,
"D": -9.726787567138672,
"E": -9.291827201843262
}
}
},
{
"ex_id": "aqua-test-90",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.4867076873779297,
"scores": {
"A": -11.009735107421875,
"B": -10.518085479736328,
"C": -12.004793167114258,
"D": -12.105035781860352,
"E": -13.80916976928711
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.5990352630615234,
"scores": {
"A": -8.198083877563477,
"B": -9.129544258117676,
"C": -10.797119140625,
"D": -9.568111419677734,
"E": -10.094569206237793
}
}
},
{
"ex_id": "aqua-test-91",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.3843660354614258,
"scores": {
"A": -12.006914138793945,
"B": -10.0424222946167,
"C": -11.426788330078125,
"D": -12.127811431884766,
"E": -10.665849685668945
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.6309232711791992,
"scores": {
"A": -8.031240463256836,
"B": -8.961100578308105,
"C": -9.662163734436035,
"D": -9.757013320922852,
"E": -9.014056205749512
}
}
},
{
"ex_id": "aqua-test-92",
"gold": "B",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -0.3693962097167969,
"scores": {
"A": -12.054543495178223,
"B": -12.42393970489502,
"C": -12.770564079284668,
"D": -14.098543167114258,
"E": -14.959080696105957
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.9901371002197266,
"scores": {
"A": -11.04039192199707,
"B": -15.030529022216797,
"C": -15.173776626586914,
"D": -13.621156692504883,
"E": -17.786663055419922
}
}
},
{
"ex_id": "aqua-test-93",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.8483829498291016,
"scores": {
"A": -13.06054401397705,
"B": -9.098247528076172,
"C": -9.98631477355957,
"D": -10.790071487426758,
"E": -10.946630477905273
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.748597145080566,
"scores": {
"A": -8.24567699432373,
"B": -12.128637313842773,
"C": -15.724483489990234,
"D": -13.707090377807617,
"E": -14.994274139404297
}
}
},
{
"ex_id": "aqua-test-94",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -1.9227781295776367,
"scores": {
"A": -12.715982437133789,
"B": -10.660414695739746,
"C": -10.040125846862793,
"D": -13.691793441772461,
"E": -11.96290397644043
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -5.577579975128174,
"scores": {
"A": -5.7996954917907715,
"B": -6.2772345542907715,
"C": -9.271978378295898,
"D": -10.277997970581055,
"E": -11.377275466918945
}
}
},
{
"ex_id": "aqua-test-95",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -3.9412012100219727,
"scores": {
"A": -11.20505428314209,
"B": -9.095925331115723,
"C": -8.017539978027344,
"D": -12.286678314208984,
"E": -11.958741188049316
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.4480342864990234,
"scores": {
"A": -8.222542762756348,
"B": -9.621024131774902,
"C": -8.630133628845215,
"D": -9.554940223693848,
"E": -9.670577049255371
}
}
},
{
"ex_id": "aqua-test-96",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -4.339990615844727,
"scores": {
"A": -9.821067810058594,
"B": -9.126599311828613,
"C": -13.028761863708496,
"D": -11.429372787475586,
"E": -13.46658992767334
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.383525371551514,
"scores": {
"A": -7.286087512969971,
"B": -8.687753677368164,
"C": -13.005938529968262,
"D": -10.64708423614502,
"E": -11.669612884521484
}
}
},
{
"ex_id": "aqua-test-97",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.8383378982543945,
"scores": {
"A": -14.469042778015137,
"B": -12.092732429504395,
"C": -13.835532188415527,
"D": -14.931070327758789,
"E": -13.803962707519531
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.630073547363281,
"scores": {
"A": -10.096877098083496,
"B": -10.369461059570312,
"C": -13.923606872558594,
"D": -14.726950645446777,
"E": -14.135393142700195
}
}
},
{
"ex_id": "aqua-test-98",
"gold": "D",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.004549980163574,
"scores": {
"A": -13.00085735321045,
"B": -10.797918319702148,
"C": -10.5806884765625,
"D": -12.585238456726074,
"E": -11.885275840759277
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.416612148284912,
"scores": {
"A": -4.055731296539307,
"B": -8.211648941040039,
"C": -10.580713272094727,
"D": -10.472343444824219,
"E": -12.015127182006836
}
}
},
{
"ex_id": "aqua-test-99",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.7006492614746094,
"scores": {
"A": -10.885625839233398,
"B": -9.20606803894043,
"C": -9.620462417602539,
"D": -11.3240966796875,
"E": -10.906717300415039
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -7.5970916748046875,
"scores": {
"A": -7.159132957458496,
"B": -8.3507661819458,
"C": -12.776918411254883,
"D": -12.628029823303223,
"E": -14.756224632263184
}
}
},
{
"ex_id": "aqua-test-100",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.1513805389404297,
"scores": {
"A": -9.272323608398438,
"B": -9.739631652832031,
"C": -9.120943069458008,
"D": -10.063505172729492,
"E": -10.608749389648438
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.166820049285889,
"scores": {
"A": -4.778280735015869,
"B": -9.417329788208008,
"C": -10.945100784301758,
"D": -11.501747131347656,
"E": -13.226821899414062
}
}
},
{
"ex_id": "aqua-test-101",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 0.3324604034423828,
"scores": {
"A": -10.100502014160156,
"B": -10.432962417602539,
"C": -11.973075866699219,
"D": -10.604475021362305,
"E": -12.458782196044922
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 4.506379127502441,
"scores": {
"A": -7.595697402954102,
"B": -12.102076530456543,
"C": -13.821539878845215,
"D": -12.81662654876709,
"E": -14.543049812316895
}
}
},
{
"ex_id": "aqua-test-102",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -4.450368881225586,
"scores": {
"A": -11.315129280090332,
"B": -10.445816993713379,
"C": -10.68630599975586,
"D": -14.12060832977295,
"E": -14.896185874938965
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -7.9963579177856445,
"scores": {
"A": -6.833308219909668,
"B": -10.518851280212402,
"C": -13.344768524169922,
"D": -13.985496520996094,
"E": -14.829666137695312
}
}
},
{
"ex_id": "aqua-test-103",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.1493282318115234,
"scores": {
"A": -9.748441696166992,
"B": -8.529296875,
"C": -9.693557739257812,
"D": -11.449222564697266,
"E": -9.678625106811523
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.7451763153076172,
"scores": {
"A": -7.868520736694336,
"B": -8.613697052001953,
"C": -10.544960975646973,
"D": -9.806873321533203,
"E": -8.439764022827148
}
}
},
{
"ex_id": "aqua-test-104",
"gold": "D",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -3.3600540161132812,
"scores": {
"A": -14.189347267150879,
"B": -11.361625671386719,
"C": -11.152084350585938,
"D": -14.512138366699219,
"E": -15.981123924255371
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.8950366973876953,
"scores": {
"A": -8.590035438537598,
"B": -10.608055114746094,
"C": -11.210797309875488,
"D": -11.485072135925293,
"E": -10.96902847290039
}
}
},
{
"ex_id": "aqua-test-105",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 3.0392799377441406,
"scores": {
"A": -11.515534400939941,
"B": -12.032148361206055,
"C": -8.4762544631958,
"D": -13.967401504516602,
"E": -13.267354011535645
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.133573055267334,
"scores": {
"A": -7.463276386260986,
"B": -9.80911636352539,
"C": -8.59684944152832,
"D": -13.382390975952148,
"E": -13.58960247039795
}
}
},
{
"ex_id": "aqua-test-106",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.9239311218261719,
"scores": {
"A": -10.124608039855957,
"B": -9.200676918029785,
"C": -9.467672348022461,
"D": -13.042096138000488,
"E": -13.135705947875977
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 3.8698911666870117,
"scores": {
"A": -7.598464012145996,
"B": -11.948047637939453,
"C": -13.422207832336426,
"D": -11.468355178833008,
"E": -14.042037010192871
}
}
},
{
"ex_id": "aqua-test-107",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.0206260681152344,
"scores": {
"A": -9.933467864990234,
"B": -8.912841796875,
"C": -11.042007446289062,
"D": -12.203380584716797,
"E": -10.170745849609375
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 3.180150032043457,
"scores": {
"A": -7.0703229904174805,
"B": -10.250473022460938,
"C": -11.684900283813477,
"D": -10.878337860107422,
"E": -11.952753067016602
}
}
},
{
"ex_id": "aqua-test-108",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.651386260986328,
"scores": {
"A": -12.283191680908203,
"B": -9.631805419921875,
"C": -12.84640121459961,
"D": -13.274940490722656,
"E": -14.580394744873047
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.882678508758545,
"scores": {
"A": -6.49999475479126,
"B": -8.382673263549805,
"C": -11.06556510925293,
"D": -10.006368637084961,
"E": -11.96578598022461
}
}
},
{
"ex_id": "aqua-test-109",
"gold": "C",
"baseline": {
"pred_label": "E",
"correct": false,
"margin": -0.8047208786010742,
"scores": {
"A": -10.049991607666016,
"B": -11.320069313049316,
"C": -10.77479362487793,
"D": -12.454825401306152,
"E": -9.970072746276855
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.179561614990234,
"scores": {
"A": -8.94586181640625,
"B": -13.091926574707031,
"C": -15.125423431396484,
"D": -13.694250106811523,
"E": -14.551794052124023
}
}
},
{
"ex_id": "aqua-test-110",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.271200180053711,
"scores": {
"A": -12.053251266479492,
"B": -8.445196151733398,
"C": -11.71639633178711,
"D": -12.579341888427734,
"E": -15.129302024841309
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -5.586641788482666,
"scores": {
"A": -5.685309886932373,
"B": -9.068038940429688,
"C": -11.271951675415039,
"D": -11.380401611328125,
"E": -13.35078239440918
}
}
},
{
"ex_id": "aqua-test-111",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.18841552734375,
"scores": {
"A": -9.808207511901855,
"B": -9.283623695373535,
"C": -9.472039222717285,
"D": -10.7572660446167,
"E": -11.43770980834961
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.6287879943847656,
"scores": {
"A": -8.439443588256836,
"B": -12.068231582641602,
"C": -12.49129867553711,
"D": -13.331933975219727,
"E": -14.553701400756836
}
}
},
{
"ex_id": "aqua-test-112",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -5.095149040222168,
"scores": {
"A": -16.67582893371582,
"B": -16.2126522064209,
"C": -11.580679893493652,
"D": -13.807619094848633,
"E": -15.536310195922852
}
},
"ablated": {
"pred_label": "D",
"correct": false,
"margin": -0.09568214416503906,
"scores": {
"A": -10.995382308959961,
"B": -13.306709289550781,
"C": -11.021820068359375,
"D": -10.899700164794922,
"E": -15.755931854248047
}
}
},
{
"ex_id": "aqua-test-113",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.3748960494995117,
"scores": {
"A": -10.411832809448242,
"B": -9.715240478515625,
"C": -10.090136528015137,
"D": -12.844676971435547,
"E": -11.264602661132812
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -7.242756366729736,
"scores": {
"A": -5.61425256729126,
"B": -10.998117446899414,
"C": -12.857008934020996,
"D": -11.544221878051758,
"E": -13.189793586730957
}
}
},
{
"ex_id": "aqua-test-114",
"gold": "B",
"baseline": {
"pred_label": "E",
"correct": false,
"margin": -0.20656394958496094,
"scores": {
"A": -12.522754669189453,
"B": -11.953495025634766,
"C": -12.375024795532227,
"D": -12.647726058959961,
"E": -11.746931076049805
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.894749641418457,
"scores": {
"A": -5.269627571105957,
"B": -10.164377212524414,
"C": -12.320079803466797,
"D": -10.714139938354492,
"E": -12.523801803588867
}
}
},
{
"ex_id": "aqua-test-115",
"gold": "E",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -1.2912511825561523,
"scores": {
"A": -10.948766708374023,
"B": -11.498233795166016,
"C": -11.3041410446167,
"D": -12.34277629852295,
"E": -12.240017890930176
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.3497333526611328,
"scores": {
"A": -9.884903907775879,
"B": -12.706127166748047,
"C": -13.208802223205566,
"D": -11.332338333129883,
"E": -11.234637260437012
}
}
},
{
"ex_id": "aqua-test-116",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.7076244354248047,
"scores": {
"A": -12.037410736083984,
"B": -9.75387191772461,
"C": -11.461496353149414,
"D": -11.536352157592773,
"E": -11.817276000976562
}
},
"ablated": {
"pred_label": "D",
"correct": false,
"margin": -0.5289134979248047,
"scores": {
"A": -7.282122611999512,
"B": -7.493680000305176,
"C": -8.805983543395996,
"D": -6.964766502380371,
"E": -7.28157901763916
}
}
},
{
"ex_id": "aqua-test-117",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.203188896179199,
"scores": {
"A": -11.712003707885742,
"B": -9.130005836486816,
"C": -9.083451271057129,
"D": -11.028267860412598,
"E": -11.286640167236328
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.0263042449951172,
"scores": {
"A": -6.421114444732666,
"B": -6.90539026260376,
"C": -7.702053546905518,
"D": -8.437675476074219,
"E": -7.447418689727783
}
}
},
{
"ex_id": "aqua-test-118",
"gold": "C",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -0.34549522399902344,
"scores": {
"A": -12.712060928344727,
"B": -14.21017074584961,
"C": -13.05755615234375,
"D": -14.676868438720703,
"E": -13.82982063293457
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.252425193786621,
"scores": {
"A": -7.9384307861328125,
"B": -10.416237831115723,
"C": -10.190855979919434,
"D": -11.566178321838379,
"E": -11.377034187316895
}
}
},
{
"ex_id": "aqua-test-119",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.894613265991211,
"scores": {
"A": -12.752466201782227,
"B": -11.295127868652344,
"C": -13.406665802001953,
"D": -13.189741134643555,
"E": -12.74017333984375
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.093064308166504,
"scores": {
"A": -6.806607246398926,
"B": -7.607048988342285,
"C": -10.043014526367188,
"D": -10.89967155456543,
"E": -11.805773735046387
}
}
},
{
"ex_id": "aqua-test-120",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.3595123291015625,
"scores": {
"A": -12.646347045898438,
"B": -10.183612823486328,
"C": -10.54312515258789,
"D": -11.979488372802734,
"E": -12.640970230102539
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.8485918045043945,
"scores": {
"A": -7.463525772094727,
"B": -12.312117576599121,
"C": -11.753535270690918,
"D": -12.008286476135254,
"E": -13.767097473144531
}
}
},
{
"ex_id": "aqua-test-121",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.6828231811523438,
"scores": {
"A": -12.19267463684082,
"B": -10.822580337524414,
"C": -11.00235366821289,
"D": -13.505403518676758,
"E": -13.309852600097656
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.824639797210693,
"scores": {
"A": -7.172897815704346,
"B": -9.329679489135742,
"C": -10.56558609008789,
"D": -11.997537612915039,
"E": -11.65449333190918
}
}
},
{
"ex_id": "aqua-test-122",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.6598358154296875,
"scores": {
"A": -11.076019287109375,
"B": -10.416183471679688,
"C": -13.238750457763672,
"D": -13.289159774780273,
"E": -13.489381790161133
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.3248538970947266,
"scores": {
"A": -6.911991119384766,
"B": -9.236845016479492,
"C": -12.405698776245117,
"D": -10.99496078491211,
"E": -12.164006233215332
}
}
},
{
"ex_id": "aqua-test-123",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 1.928288459777832,
"scores": {
"A": -12.817946434020996,
"B": -13.251622200012207,
"C": -10.08199405670166,
"D": -12.010282516479492,
"E": -12.828923225402832
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.036839485168457,
"scores": {
"A": -7.727773666381836,
"B": -10.925074577331543,
"C": -11.764613151550293,
"D": -11.528144836425781,
"E": -13.928091049194336
}
}
},
{
"ex_id": "aqua-test-124",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.9609298706054688,
"scores": {
"A": -10.947786331176758,
"B": -9.929666519165039,
"C": -10.890596389770508,
"D": -11.183786392211914,
"E": -11.429544448852539
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.408071517944336,
"scores": {
"A": -9.135915756225586,
"B": -11.747968673706055,
"C": -11.543987274169922,
"D": -11.629928588867188,
"E": -11.322809219360352
}
}
},
{
"ex_id": "aqua-test-125",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.26287078857421875,
"scores": {
"A": -12.876455307006836,
"B": -12.006429672241211,
"C": -10.34354305267334,
"D": -10.606413841247559,
"E": -11.505398750305176
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.1885986328125,
"scores": {
"A": -6.909121513366699,
"B": -8.705928802490234,
"C": -10.0977201461792,
"D": -9.862305641174316,
"E": -10.177146911621094
}
}
},
{
"ex_id": "aqua-test-126",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.5388050079345703,
"scores": {
"A": -11.017732620239258,
"B": -8.353882789611816,
"C": -8.892687797546387,
"D": -9.955700874328613,
"E": -10.49584674835205
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -5.850264549255371,
"scores": {
"A": -5.791948318481445,
"B": -10.630058288574219,
"C": -11.642212867736816,
"D": -11.66257381439209,
"E": -12.981222152709961
}
}
},
{
"ex_id": "aqua-test-127",
"gold": "A",
"baseline": {
"pred_label": "D",
"correct": false,
"margin": -0.07229804992675781,
"scores": {
"A": -10.561637878417969,
"B": -10.769161224365234,
"C": -10.85409164428711,
"D": -10.489339828491211,
"E": -11.068832397460938
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.2087717056274414,
"scores": {
"A": -6.518125534057617,
"B": -8.725174903869629,
"C": -9.385905265808105,
"D": -7.726897239685059,
"E": -8.385092735290527
}
}
},
{
"ex_id": "aqua-test-128",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.00350284576416,
"scores": {
"A": -10.787993431091309,
"B": -10.3438081741333,
"C": -10.841973304748535,
"D": -13.455949783325195,
"E": -12.347311019897461
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.958767890930176,
"scores": {
"A": -5.449042320251465,
"B": -7.945782661437988,
"C": -9.260028839111328,
"D": -11.206633567810059,
"E": -10.40781021118164
}
}
},
{
"ex_id": "aqua-test-129",
"gold": "D",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -2.607754707336426,
"scores": {
"A": -8.333622932434082,
"B": -10.68683910369873,
"C": -9.665505409240723,
"D": -10.941377639770508,
"E": -9.643619537353516
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.0246152877807617,
"scores": {
"A": -6.0440778732299805,
"B": -8.230778694152832,
"C": -7.669195175170898,
"D": -8.068693161010742,
"E": -8.261618614196777
}
}
},
{
"ex_id": "aqua-test-130",
"gold": "D",
"baseline": {
"pred_label": "D",
"correct": true,
"margin": 0.5736770629882812,
"scores": {
"A": -12.52768611907959,
"B": -11.624752044677734,
"C": -14.400633811950684,
"D": -11.051074981689453,
"E": -12.196588516235352
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.6399145126342773,
"scores": {
"A": -7.714714050292969,
"B": -7.738489151000977,
"C": -10.441914558410645,
"D": -8.354628562927246,
"E": -8.231303215026855
}
}
},
{
"ex_id": "aqua-test-131",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.308675765991211,
"scores": {
"A": -14.739927291870117,
"B": -9.621098518371582,
"C": -12.929774284362793,
"D": -13.892219543457031,
"E": -14.483654022216797
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.7574863433837891,
"scores": {
"A": -9.305915832519531,
"B": -8.548429489135742,
"C": -11.97828483581543,
"D": -11.67667007446289,
"E": -12.261627197265625
}
}
},
{
"ex_id": "aqua-test-132",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.159423828125,
"scores": {
"A": -12.748001098632812,
"B": -9.872476577758789,
"C": -11.450910568237305,
"D": -11.085186004638672,
"E": -13.031900405883789
}
},
"ablated": {
"pred_label": "B",
"correct": false,
"margin": -1.519545555114746,
"scores": {
"A": -9.1354398727417,
"B": -9.123946189880371,
"C": -10.554903030395508,
"D": -10.345840454101562,
"E": -10.643491744995117
}
}
},
{
"ex_id": "aqua-test-133",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -4.092678070068359,
"scores": {
"A": -14.868802070617676,
"B": -16.15050506591797,
"C": -10.776124000549316,
"D": -14.600775718688965,
"E": -13.98430061340332
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 3.853121757507324,
"scores": {
"A": -8.56944465637207,
"B": -13.000343322753906,
"C": -12.422566413879395,
"D": -14.54437255859375,
"E": -14.450194358825684
}
}
},
{
"ex_id": "aqua-test-134",
"gold": "D",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -2.462491989135742,
"scores": {
"A": -10.72162914276123,
"B": -11.912126541137695,
"C": -13.152649879455566,
"D": -13.184121131896973,
"E": -13.222978591918945
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -5.007552146911621,
"scores": {
"A": -7.717267990112305,
"B": -10.804107666015625,
"C": -11.367389678955078,
"D": -12.724820137023926,
"E": -14.619938850402832
}
}
},
{
"ex_id": "aqua-test-135",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.5100650787353516,
"scores": {
"A": -13.62440299987793,
"B": -10.135202407836914,
"C": -10.906095504760742,
"D": -13.645267486572266,
"E": -13.970268249511719
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.0002031326293945,
"scores": {
"A": -7.204651832580566,
"B": -8.548638343811035,
"C": -9.101480484008789,
"D": -9.204854965209961,
"E": -10.522067070007324
}
}
},
{
"ex_id": "aqua-test-136",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.6944398880004883,
"scores": {
"A": -12.556368827819824,
"B": -11.861928939819336,
"C": -11.924398422241211,
"D": -14.289663314819336,
"E": -14.393033981323242
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.863565444946289,
"scores": {
"A": -10.153200149536133,
"B": -12.455869674682617,
"C": -12.468259811401367,
"D": -12.016765594482422,
"E": -13.061859130859375
}
}
},
{
"ex_id": "aqua-test-137",
"gold": "B",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -0.4276561737060547,
"scores": {
"A": -10.441576957702637,
"B": -10.515706062316895,
"C": -10.08804988861084,
"D": -12.436307907104492,
"E": -12.847251892089844
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.2915706634521484,
"scores": {
"A": -9.814286231994629,
"B": -13.105856895446777,
"C": -11.835171699523926,
"D": -12.84805965423584,
"E": -13.396150588989258
}
}
},
{
"ex_id": "aqua-test-138",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.1457071304321289,
"scores": {
"A": -11.518792152404785,
"B": -11.373085021972656,
"C": -13.849699974060059,
"D": -13.589049339294434,
"E": -12.325687408447266
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 2.37286376953125,
"scores": {
"A": -9.340991973876953,
"B": -11.942909240722656,
"C": -13.375253677368164,
"D": -12.332799911499023,
"E": -11.713855743408203
}
}
},
{
"ex_id": "aqua-test-139",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -4.983943939208984,
"scores": {
"A": -11.229677200317383,
"B": -9.654775619506836,
"C": -11.174234390258789,
"D": -12.573564529418945,
"E": -14.63871955871582
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.07908821105957,
"scores": {
"A": -7.586383819580078,
"B": -9.389440536499023,
"C": -10.293685913085938,
"D": -9.784049987792969,
"E": -11.665472030639648
}
}
},
{
"ex_id": "aqua-test-140",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.090902328491211,
"scores": {
"A": -12.17054557800293,
"B": -10.950679779052734,
"C": -12.478940963745117,
"D": -12.041582107543945,
"E": -12.825494766235352
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.0806522369384766,
"scores": {
"A": -8.631109237670898,
"B": -9.711761474609375,
"C": -10.810302734375,
"D": -10.214776992797852,
"E": -11.603350639343262
}
}
},
{
"ex_id": "aqua-test-141",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.3972196578979492,
"scores": {
"A": -15.668845176696777,
"B": -14.022212028503418,
"C": -12.345376968383789,
"D": -12.742596626281738,
"E": -13.434144973754883
}
},
"ablated": {
"pred_label": "E",
"correct": false,
"margin": -1.4827747344970703,
"scores": {
"A": -9.032247543334961,
"B": -10.177014350891113,
"C": -9.580657005310059,
"D": -8.116410255432129,
"E": -8.097882270812988
}
}
},
{
"ex_id": "aqua-test-142",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 0.03481101989746094,
"scores": {
"A": -12.051063537597656,
"B": -12.085874557495117,
"C": -12.250658988952637,
"D": -13.729873657226562,
"E": -13.645383834838867
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 2.3378829956054688,
"scores": {
"A": -10.318166732788086,
"B": -13.508720397949219,
"C": -12.656049728393555,
"D": -13.118291854858398,
"E": -14.187196731567383
}
}
},
{
"ex_id": "aqua-test-143",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.2146177291870117,
"scores": {
"A": -10.845283508300781,
"B": -9.34599781036377,
"C": -10.440536499023438,
"D": -10.560615539550781,
"E": -10.313671112060547
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.8671979904174805,
"scores": {
"A": -8.795687675476074,
"B": -11.772283554077148,
"C": -12.032180786132812,
"D": -10.662885665893555,
"E": -11.071569442749023
}
}
},
{
"ex_id": "aqua-test-144",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.5195407867431641,
"scores": {
"A": -14.143760681152344,
"B": -13.62421989440918,
"C": -16.54352569580078,
"D": -16.72017478942871,
"E": -16.012075424194336
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 7.688299179077148,
"scores": {
"A": -6.442632675170898,
"B": -14.130931854248047,
"C": -17.20372772216797,
"D": -14.199527740478516,
"E": -15.404621124267578
}
}
},
{
"ex_id": "aqua-test-145",
"gold": "B",
"baseline": {
"pred_label": "E",
"correct": false,
"margin": -0.12565994262695312,
"scores": {
"A": -13.606302261352539,
"B": -11.843841552734375,
"C": -14.27863883972168,
"D": -13.242870330810547,
"E": -11.718181610107422
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.291046142578125,
"scores": {
"A": -8.400792121887207,
"B": -11.691838264465332,
"C": -15.319357872009277,
"D": -13.33833122253418,
"E": -12.860288619995117
}
}
},
{
"ex_id": "aqua-test-146",
"gold": "B",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -1.6924715042114258,
"scores": {
"A": -9.780816078186035,
"B": -11.162944793701172,
"C": -9.470473289489746,
"D": -10.71984577178955,
"E": -9.969797134399414
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.404109001159668,
"scores": {
"A": -8.36557388305664,
"B": -10.769682884216309,
"C": -14.039958000183105,
"D": -13.555811882019043,
"E": -13.034090042114258
}
}
},
{
"ex_id": "aqua-test-147",
"gold": "E",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -0.14206600189208984,
"scores": {
"A": -9.889412879943848,
"B": -10.670077323913574,
"C": -11.043986320495605,
"D": -12.524433135986328,
"E": -10.031478881835938
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.204798698425293,
"scores": {
"A": -6.384355545043945,
"B": -8.133864402770996,
"C": -9.067092895507812,
"D": -10.208111763000488,
"E": -8.589154243469238
}
}
},
{
"ex_id": "aqua-test-148",
"gold": "D",
"baseline": {
"pred_label": "D",
"correct": true,
"margin": 0.09283638000488281,
"scores": {
"A": -11.842838287353516,
"B": -8.686580657958984,
"C": -9.391075134277344,
"D": -8.593744277954102,
"E": -10.327585220336914
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.2917442321777344,
"scores": {
"A": -8.228094100952148,
"B": -9.228675842285156,
"C": -9.42142105102539,
"D": -8.519838333129883,
"E": -9.596782684326172
}
}
},
{
"ex_id": "aqua-test-149",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.9827327728271484,
"scores": {
"A": -12.001296997070312,
"B": -10.335747718811035,
"C": -11.285538673400879,
"D": -11.318480491638184,
"E": -11.636820793151855
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.491743564605713,
"scores": {
"A": -7.410029888153076,
"B": -10.250740051269531,
"C": -13.93216323852539,
"D": -13.901773452758789,
"E": -15.188919067382812
}
}
},
{
"ex_id": "aqua-test-150",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.9258947372436523,
"scores": {
"A": -12.320900917053223,
"B": -9.475645065307617,
"C": -10.984822273254395,
"D": -12.40153980255127,
"E": -12.635085105895996
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -5.376846790313721,
"scores": {
"A": -7.0111308097839355,
"B": -12.037229537963867,
"C": -12.431285858154297,
"D": -12.387977600097656,
"E": -13.794790267944336
}
}
},
{
"ex_id": "aqua-test-151",
"gold": "C",
"baseline": {
"pred_label": "E",
"correct": false,
"margin": -2.647052764892578,
"scores": {
"A": -15.152583122253418,
"B": -13.6299467086792,
"C": -14.575118064880371,
"D": -15.285728454589844,
"E": -11.928065299987793
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.1320362091064453,
"scores": {
"A": -10.271482467651367,
"B": -11.43825912475586,
"C": -13.403518676757812,
"D": -11.501873016357422,
"E": -12.473245620727539
}
}
},
{
"ex_id": "aqua-test-152",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.6472129821777344,
"scores": {
"A": -12.141305923461914,
"B": -11.08128833770752,
"C": -11.728501319885254,
"D": -11.744885444641113,
"E": -11.734070777893066
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.606741905212402,
"scores": {
"A": -8.290619850158691,
"B": -12.897361755371094,
"C": -16.176721572875977,
"D": -13.130666732788086,
"E": -13.918773651123047
}
}
},
{
"ex_id": "aqua-test-153",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 0.4722251892089844,
"scores": {
"A": -10.484762191772461,
"B": -10.956987380981445,
"C": -12.194547653198242,
"D": -15.127632141113281,
"E": -13.331162452697754
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 3.3751373291015625,
"scores": {
"A": -5.1184186935424805,
"B": -8.740020751953125,
"C": -8.493556022644043,
"D": -12.901175498962402,
"E": -12.068525314331055
}
}
},
{
"ex_id": "aqua-test-154",
"gold": "D",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -4.98802375793457,
"scores": {
"A": -10.425346374511719,
"B": -11.881086349487305,
"C": -11.785425186157227,
"D": -15.413370132446289,
"E": -13.525296211242676
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.682590961456299,
"scores": {
"A": -6.576329708099365,
"B": -9.645816802978516,
"C": -10.998794555664062,
"D": -11.258920669555664,
"E": -11.089090347290039
}
}
},
{
"ex_id": "aqua-test-155",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -3.2458534240722656,
"scores": {
"A": -11.75346565246582,
"B": -10.164717674255371,
"C": -8.507612228393555,
"D": -12.298287391662598,
"E": -12.295981407165527
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.698258399963379,
"scores": {
"A": -6.5401506423950195,
"B": -9.082632064819336,
"C": -8.238409042358398,
"D": -9.46942138671875,
"E": -8.837421417236328
}
}
},
{
"ex_id": "aqua-test-156",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.5458030700683594,
"scores": {
"A": -11.058704376220703,
"B": -9.512901306152344,
"C": -10.548510551452637,
"D": -10.738350868225098,
"E": -10.144469261169434
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 0.5494518280029297,
"scores": {
"A": -8.577381134033203,
"B": -9.532999038696289,
"C": -10.54125690460205,
"D": -9.21225643157959,
"E": -9.126832962036133
}
}
},
{
"ex_id": "aqua-test-157",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.2852640151977539,
"scores": {
"A": -10.463203430175781,
"B": -8.768805503845215,
"C": -9.054069519042969,
"D": -9.892763137817383,
"E": -10.059773445129395
}
},
"ablated": {
"pred_label": "E",
"correct": false,
"margin": -1.2878742218017578,
"scores": {
"A": -7.138072967529297,
"B": -8.380763053894043,
"C": -8.266191482543945,
"D": -7.350512504577637,
"E": -6.9783172607421875
}
}
},
{
"ex_id": "aqua-test-158",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.5956363677978516,
"scores": {
"A": -11.858783721923828,
"B": -9.868914604187012,
"C": -12.355928421020508,
"D": -12.464550971984863,
"E": -12.451033592224121
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.8353786468505859,
"scores": {
"A": -7.951072692871094,
"B": -10.968667030334473,
"C": -9.150461196899414,
"D": -8.78645133972168,
"E": -9.640289306640625
}
}
},
{
"ex_id": "aqua-test-159",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.2088556289672852,
"scores": {
"A": -14.390352249145508,
"B": -10.970995903015137,
"C": -12.179851531982422,
"D": -14.215005874633789,
"E": -13.121991157531738
}
},
"ablated": {
"pred_label": "D",
"correct": false,
"margin": -1.5493526458740234,
"scores": {
"A": -10.103426933288574,
"B": -10.092862129211426,
"C": -11.540170669555664,
"D": -9.99081802368164,
"E": -10.121420860290527
}
}
},
{
"ex_id": "aqua-test-160",
"gold": "D",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -2.848499298095703,
"scores": {
"A": -11.598515510559082,
"B": -14.197969436645508,
"C": -14.457886695861816,
"D": -14.447014808654785,
"E": -14.36185073852539
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.696338653564453,
"scores": {
"A": -7.641029357910156,
"B": -11.26103687286377,
"C": -12.703668594360352,
"D": -12.33736801147461,
"E": -12.883567810058594
}
}
},
{
"ex_id": "aqua-test-161",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.1676807403564453,
"scores": {
"A": -14.895027160644531,
"B": -10.322026252746582,
"C": -12.489706993103027,
"D": -12.704346656799316,
"E": -15.176275253295898
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.219618797302246,
"scores": {
"A": -6.856925964355469,
"B": -7.009190559387207,
"C": -9.076544761657715,
"D": -9.654449462890625,
"E": -9.462542533874512
}
}
},
{
"ex_id": "aqua-test-162",
"gold": "C",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -2.2371959686279297,
"scores": {
"A": -10.148950576782227,
"B": -10.49891471862793,
"C": -12.386146545410156,
"D": -11.85212516784668,
"E": -11.997817993164062
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.578624725341797,
"scores": {
"A": -5.560625076293945,
"B": -10.614078521728516,
"C": -10.139249801635742,
"D": -10.14529800415039,
"E": -11.632416725158691
}
}
},
{
"ex_id": "aqua-test-163",
"gold": "C",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -1.3345470428466797,
"scores": {
"A": -10.16016960144043,
"B": -10.63470458984375,
"C": -11.49471664428711,
"D": -14.061302185058594,
"E": -11.96687126159668
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.617689609527588,
"scores": {
"A": -7.696357250213623,
"B": -11.909870147705078,
"C": -12.314046859741211,
"D": -12.379276275634766,
"E": -10.673864364624023
}
}
},
{
"ex_id": "aqua-test-164",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.0201797485351562,
"scores": {
"A": -11.76971435546875,
"B": -11.384875297546387,
"C": -12.587923049926758,
"D": -14.405055046081543,
"E": -13.626121520996094
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.4918289184570312,
"scores": {
"A": -10.035612106323242,
"B": -13.309289932250977,
"C": -14.361808776855469,
"D": -12.527441024780273,
"E": -13.649295806884766
}
}
},
{
"ex_id": "aqua-test-165",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.3930606842041016,
"scores": {
"A": -13.838768005371094,
"B": -11.385400772094727,
"C": -14.192607879638672,
"D": -12.778461456298828,
"E": -14.867376327514648
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.0367560386657715,
"scores": {
"A": -7.127369403839111,
"B": -8.74629020690918,
"C": -13.14265251159668,
"D": -13.164125442504883,
"E": -14.882063865661621
}
}
},
{
"ex_id": "aqua-test-166",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.7403507232666016,
"scores": {
"A": -13.24523639678955,
"B": -8.922253608703613,
"C": -9.740599632263184,
"D": -10.831602096557617,
"E": -11.662604331970215
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -5.432188510894775,
"scores": {
"A": -6.22821569442749,
"B": -8.795976638793945,
"C": -9.08587646484375,
"D": -9.576181411743164,
"E": -11.660404205322266
}
}
},
{
"ex_id": "aqua-test-167",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 2.286722183227539,
"scores": {
"A": -13.319049835205078,
"B": -10.63465690612793,
"C": -12.921379089355469,
"D": -16.10821533203125,
"E": -14.74123764038086
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.3104257583618164,
"scores": {
"A": -8.05471420288086,
"B": -11.365139961242676,
"C": -15.134896278381348,
"D": -13.336740493774414,
"E": -14.394715309143066
}
}
},
{
"ex_id": "aqua-test-168",
"gold": "D",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -1.5851516723632812,
"scores": {
"A": -11.949223518371582,
"B": -12.088781356811523,
"C": -12.600192070007324,
"D": -13.534375190734863,
"E": -13.724043846130371
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.811002731323242,
"scores": {
"A": -8.285033226013184,
"B": -12.87575912475586,
"C": -14.61474609375,
"D": -13.096035957336426,
"E": -14.469371795654297
}
}
},
{
"ex_id": "aqua-test-169",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.259394645690918,
"scores": {
"A": -11.700346946716309,
"B": -9.44095230102539,
"C": -11.634363174438477,
"D": -13.524284362792969,
"E": -12.442931175231934
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.1425189971923828,
"scores": {
"A": -9.171747207641602,
"B": -10.314266204833984,
"C": -10.48922348022461,
"D": -11.292402267456055,
"E": -11.195283889770508
}
}
},
{
"ex_id": "aqua-test-170",
"gold": "E",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -1.667557716369629,
"scores": {
"A": -12.687468528747559,
"B": -13.540651321411133,
"C": -15.73199462890625,
"D": -14.172163009643555,
"E": -14.355026245117188
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.3893675804138184,
"scores": {
"A": -7.45543909072876,
"B": -11.622758865356445,
"C": -12.694786071777344,
"D": -10.333147048950195,
"E": -10.844806671142578
}
}
},
{
"ex_id": "aqua-test-171",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.3269481658935547,
"scores": {
"A": -12.739081382751465,
"B": -11.41213321685791,
"C": -12.104532241821289,
"D": -14.289388656616211,
"E": -13.22745132446289
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 3.89689302444458,
"scores": {
"A": -7.288093090057373,
"B": -11.184986114501953,
"C": -14.11172103881836,
"D": -13.88416862487793,
"E": -14.152048110961914
}
}
},
{
"ex_id": "aqua-test-172",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.552183151245117,
"scores": {
"A": -11.899900436401367,
"B": -9.34771728515625,
"C": -9.6818265914917,
"D": -12.592266082763672,
"E": -11.358457565307617
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 0.2487936019897461,
"scores": {
"A": -7.7580461502075195,
"B": -8.006839752197266,
"C": -8.965506553649902,
"D": -10.227289199829102,
"E": -8.758523941040039
}
}
},
{
"ex_id": "aqua-test-173",
"gold": "D",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -4.587629318237305,
"scores": {
"A": -9.585987091064453,
"B": -13.276374816894531,
"C": -13.367696762084961,
"D": -14.173616409301758,
"E": -12.080738067626953
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -7.505217552185059,
"scores": {
"A": -7.101271629333496,
"B": -12.202089309692383,
"C": -14.173044204711914,
"D": -14.606489181518555,
"E": -14.75442123413086
}
}
},
{
"ex_id": "aqua-test-174",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.24216461181640625,
"scores": {
"A": -14.199091911315918,
"B": -13.47407054901123,
"C": -14.678143501281738,
"D": -13.550527572631836,
"E": -13.716235160827637
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -5.9764862060546875,
"scores": {
"A": -8.514163970947266,
"B": -11.301080703735352,
"C": -14.558061599731445,
"D": -12.563972473144531,
"E": -14.490650177001953
}
}
},
{
"ex_id": "aqua-test-175",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.3755264282226562,
"scores": {
"A": -12.588652610778809,
"B": -11.279211044311523,
"C": -12.724414825439453,
"D": -13.617168426513672,
"E": -12.65473747253418
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.200804233551025,
"scores": {
"A": -7.825118541717529,
"B": -12.34547233581543,
"C": -15.431029319763184,
"D": -11.632364273071289,
"E": -14.025922775268555
}
}
},
{
"ex_id": "aqua-test-176",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.7927188873291016,
"scores": {
"A": -10.986503601074219,
"B": -10.264165878295898,
"C": -13.056884765625,
"D": -13.231691360473633,
"E": -11.577075958251953
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.047300338745117,
"scores": {
"A": -8.569661140441895,
"B": -10.96971607208252,
"C": -12.616961479187012,
"D": -10.226570129394531,
"E": -10.359309196472168
}
}
},
{
"ex_id": "aqua-test-177",
"gold": "C",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -4.117347717285156,
"scores": {
"A": -9.619380950927734,
"B": -9.922872543334961,
"C": -13.73672866821289,
"D": -11.895669937133789,
"E": -11.525716781616211
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.858722686767578,
"scores": {
"A": -5.202523231506348,
"B": -7.391201972961426,
"C": -12.061245918273926,
"D": -9.575565338134766,
"E": -11.126143455505371
}
}
},
{
"ex_id": "aqua-test-178",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.44650745391845703,
"scores": {
"A": -13.206219673156738,
"B": -11.094629287719727,
"C": -12.79085922241211,
"D": -12.61279582977295,
"E": -10.64812183380127
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.734641075134277,
"scores": {
"A": -5.792222023010254,
"B": -10.40644359588623,
"C": -8.512224197387695,
"D": -10.881692886352539,
"E": -12.526863098144531
}
}
},
{
"ex_id": "aqua-test-179",
"gold": "B",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -1.587289810180664,
"scores": {
"A": -9.154937744140625,
"B": -10.742227554321289,
"C": -11.76572322845459,
"D": -12.661623001098633,
"E": -12.793743133544922
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.158479690551758,
"scores": {
"A": -5.91640567779541,
"B": -8.074885368347168,
"C": -10.336216926574707,
"D": -10.604473114013672,
"E": -12.273855209350586
}
}
},
{
"ex_id": "aqua-test-180",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.503520965576172,
"scores": {
"A": -11.160909652709961,
"B": -9.851707458496094,
"C": -12.355228424072266,
"D": -13.170286178588867,
"E": -12.383331298828125
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.526637077331543,
"scores": {
"A": -10.631556510925293,
"B": -11.933387756347656,
"C": -12.158193588256836,
"D": -11.02450942993164,
"E": -11.620341300964355
}
}
},
{
"ex_id": "aqua-test-181",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.7682161331176758,
"scores": {
"A": -9.629287719726562,
"B": -8.861071586608887,
"C": -11.832342147827148,
"D": -11.63463020324707,
"E": -10.680866241455078
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.5154037475585938,
"scores": {
"A": -7.565939903259277,
"B": -9.081343650817871,
"C": -10.455299377441406,
"D": -9.157304763793945,
"E": -9.032361030578613
}
}
},
{
"ex_id": "aqua-test-182",
"gold": "C",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -3.821702003479004,
"scores": {
"A": -11.69933032989502,
"B": -15.636759757995605,
"C": -15.521032333374023,
"D": -15.261280059814453,
"E": -15.790119171142578
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.4775166511535645,
"scores": {
"A": -7.3024001121521,
"B": -11.616369247436523,
"C": -13.779916763305664,
"D": -14.841501235961914,
"E": -15.299184799194336
}
}
},
{
"ex_id": "aqua-test-183",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.4096593856811523,
"scores": {
"A": -12.662055969238281,
"B": -9.372528076171875,
"C": -10.782187461853027,
"D": -13.160992622375488,
"E": -13.141705513000488
}
},
"ablated": {
"pred_label": "C",
"correct": false,
"margin": -0.7217111587524414,
"scores": {
"A": -8.667959213256836,
"B": -7.500253200531006,
"C": -6.7785420417785645,
"D": -9.29892349243164,
"E": -10.76202392578125
}
}
},
{
"ex_id": "aqua-test-184",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.1710405349731445,
"scores": {
"A": -11.335744857788086,
"B": -9.919331550598145,
"C": -11.165321350097656,
"D": -13.090372085571289,
"E": -12.288164138793945
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.067633628845215,
"scores": {
"A": -8.7251615524292,
"B": -11.410130500793457,
"C": -11.521978378295898,
"D": -10.792795181274414,
"E": -11.264982223510742
}
}
},
{
"ex_id": "aqua-test-185",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.1894311904907227,
"scores": {
"A": -11.377983093261719,
"B": -9.404431343078613,
"C": -12.593862533569336,
"D": -12.444841384887695,
"E": -12.661911964416504
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -8.37000560760498,
"scores": {
"A": -5.967945098876953,
"B": -12.820409774780273,
"C": -14.337950706481934,
"D": -13.287762641906738,
"E": -14.76830005645752
}
}
},
{
"ex_id": "aqua-test-186",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 0.3980731964111328,
"scores": {
"A": -11.276969909667969,
"B": -11.675043106079102,
"C": -14.097780227661133,
"D": -14.689929962158203,
"E": -13.63922119140625
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 2.4271883964538574,
"scores": {
"A": -7.584385395050049,
"B": -10.011573791503906,
"C": -13.923393249511719,
"D": -12.747108459472656,
"E": -12.790593147277832
}
}
},
{
"ex_id": "aqua-test-187",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.0718564987182617,
"scores": {
"A": -12.528532028198242,
"B": -10.982802391052246,
"C": -13.054658889770508,
"D": -14.590221405029297,
"E": -14.210501670837402
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.2736144065856934,
"scores": {
"A": -7.474527835845947,
"B": -9.265643119812012,
"C": -10.74814224243164,
"D": -11.135126113891602,
"E": -11.180526733398438
}
}
},
{
"ex_id": "aqua-test-188",
"gold": "E",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -3.121814727783203,
"scores": {
"A": -12.144158363342285,
"B": -13.037331581115723,
"C": -13.73487377166748,
"D": -14.09709358215332,
"E": -15.265973091125488
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -5.235786437988281,
"scores": {
"A": -11.129180908203125,
"B": -13.703113555908203,
"C": -16.397157669067383,
"D": -12.766101837158203,
"E": -16.364967346191406
}
}
},
{
"ex_id": "aqua-test-189",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.20896244049072266,
"scores": {
"A": -14.54034423828125,
"B": -11.554760932922363,
"C": -11.811978340148926,
"D": -11.763723373413086,
"E": -13.348597526550293
}
},
"ablated": {
"pred_label": "D",
"correct": false,
"margin": -0.8642768859863281,
"scores": {
"A": -9.882810592651367,
"B": -10.419057846069336,
"C": -10.307378768920898,
"D": -9.554780960083008,
"E": -9.593378067016602
}
}
},
{
"ex_id": "aqua-test-190",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.35360145568847656,
"scores": {
"A": -13.596860885620117,
"B": -10.771349906921387,
"C": -10.41774845123291,
"D": -13.349145889282227,
"E": -13.912391662597656
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.489037036895752,
"scores": {
"A": -6.3273138999938965,
"B": -9.247300148010254,
"C": -12.816350936889648,
"D": -10.787364959716797,
"E": -12.917289733886719
}
}
},
{
"ex_id": "aqua-test-191",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.9574899673461914,
"scores": {
"A": -11.895600318908691,
"B": -10.9381103515625,
"C": -13.633337020874023,
"D": -14.099964141845703,
"E": -13.749225616455078
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.093125343322754,
"scores": {
"A": -6.434209823608398,
"B": -8.527335166931152,
"C": -11.775838851928711,
"D": -11.290367126464844,
"E": -12.324054718017578
}
}
},
{
"ex_id": "aqua-test-192",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -0.11513137817382812,
"scores": {
"A": -13.689857482910156,
"B": -13.662582397460938,
"C": -13.574726104736328,
"D": -14.454401016235352,
"E": -13.602828979492188
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.6858510971069336,
"scores": {
"A": -8.88024616241455,
"B": -12.6011962890625,
"C": -13.179601669311523,
"D": -11.137103080749512,
"E": -10.566097259521484
}
}
},
{
"ex_id": "aqua-test-193",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.7755870819091797,
"scores": {
"A": -11.522911071777344,
"B": -8.747323989868164,
"C": -9.848695755004883,
"D": -10.299760818481445,
"E": -10.045204162597656
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.2680301666259766,
"scores": {
"A": -8.59853458404541,
"B": -10.327086448669434,
"C": -10.31645679473877,
"D": -9.866564750671387,
"E": -9.983407974243164
}
}
},
{
"ex_id": "aqua-test-194",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.1696929931640625,
"scores": {
"A": -9.464816093444824,
"B": -9.411468505859375,
"C": -10.639501571655273,
"D": -11.581161499023438,
"E": -12.365375518798828
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.69554328918457,
"scores": {
"A": -7.166990280151367,
"B": -12.7379150390625,
"C": -14.002235412597656,
"D": -13.862533569335938,
"E": -15.497852325439453
}
}
},
{
"ex_id": "aqua-test-195",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -4.067173004150391,
"scores": {
"A": -11.221086502075195,
"B": -8.251949310302734,
"C": -10.201787948608398,
"D": -12.319122314453125,
"E": -12.611976623535156
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.214510917663574,
"scores": {
"A": -8.04576301574707,
"B": -9.183307647705078,
"C": -9.281808853149414,
"D": -10.260273933410645,
"E": -10.553353309631348
}
}
},
{
"ex_id": "aqua-test-196",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.8260202407836914,
"scores": {
"A": -12.807720184326172,
"B": -10.98169994354248,
"C": -11.949183464050293,
"D": -12.729838371276855,
"E": -14.811100006103516
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.4137496948242188,
"scores": {
"A": -9.012284278869629,
"B": -11.167215347290039,
"C": -11.748849868774414,
"D": -10.426033973693848,
"E": -12.733590126037598
}
}
},
{
"ex_id": "aqua-test-197",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.6986474990844727,
"scores": {
"A": -10.992959022521973,
"B": -8.939371109008789,
"C": -9.638018608093262,
"D": -9.939753532409668,
"E": -10.469696998596191
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.4545230865478516,
"scores": {
"A": -8.536977767944336,
"B": -10.056846618652344,
"C": -11.991500854492188,
"D": -8.791147232055664,
"E": -9.266132354736328
}
}
},
{
"ex_id": "aqua-test-198",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.42527008056640625,
"scores": {
"A": -11.604852676391602,
"B": -9.509223937988281,
"C": -9.934494018554688,
"D": -13.406452178955078,
"E": -11.831525802612305
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.08353328704833984,
"scores": {
"A": -9.30318546295166,
"B": -11.121411323547363,
"C": -9.38671875,
"D": -11.00162410736084,
"E": -11.414689064025879
}
}
},
{
"ex_id": "aqua-test-199",
"gold": "C",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -5.215788841247559,
"scores": {
"A": -10.268373489379883,
"B": -14.280426025390625,
"C": -15.484162330627441,
"D": -16.044178009033203,
"E": -13.105344772338867
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -5.14111328125,
"scores": {
"A": -9.658950805664062,
"B": -12.755328178405762,
"C": -14.800064086914062,
"D": -14.560892105102539,
"E": -15.274332046508789
}
}
},
{
"ex_id": "aqua-test-200",
"gold": "B",
"baseline": {
"pred_label": "E",
"correct": false,
"margin": -0.2883167266845703,
"scores": {
"A": -12.25977897644043,
"B": -11.99930477142334,
"C": -13.633522033691406,
"D": -12.022893905639648,
"E": -11.71098804473877
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.265233039855957,
"scores": {
"A": -7.928126335144043,
"B": -11.193359375,
"C": -13.55146598815918,
"D": -9.998331069946289,
"E": -9.261889457702637
}
}
},
{
"ex_id": "aqua-test-201",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.5516147613525391,
"scores": {
"A": -10.827482223510742,
"B": -10.275867462158203,
"C": -10.488014221191406,
"D": -11.649810791015625,
"E": -12.461782455444336
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.869196891784668,
"scores": {
"A": -9.49152660369873,
"B": -11.884713172912598,
"C": -13.91677188873291,
"D": -11.360723495483398,
"E": -14.611146926879883
}
}
},
{
"ex_id": "aqua-test-202",
"gold": "B",
"baseline": {
"pred_label": "E",
"correct": false,
"margin": -1.6743907928466797,
"scores": {
"A": -11.541190147399902,
"B": -11.870600700378418,
"C": -10.76689338684082,
"D": -11.914441108703613,
"E": -10.196209907531738
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.4919681549072266,
"scores": {
"A": -6.873357772827148,
"B": -10.365325927734375,
"C": -13.835872650146484,
"D": -13.012420654296875,
"E": -13.628089904785156
}
}
},
{
"ex_id": "aqua-test-203",
"gold": "D",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.915132522583008,
"scores": {
"A": -9.924501419067383,
"B": -9.837858200073242,
"C": -8.515426635742188,
"D": -11.430559158325195,
"E": -10.028295516967773
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.9277362823486328,
"scores": {
"A": -8.02107048034668,
"B": -9.770700454711914,
"C": -10.0626220703125,
"D": -8.948806762695312,
"E": -10.232612609863281
}
}
},
{
"ex_id": "aqua-test-204",
"gold": "C",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -0.054775238037109375,
"scores": {
"A": -9.659589767456055,
"B": -9.96756362915039,
"C": -9.714365005493164,
"D": -11.157163619995117,
"E": -10.775384902954102
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.617983341217041,
"scores": {
"A": -6.012364864349365,
"B": -10.016752243041992,
"C": -10.630348205566406,
"D": -11.478163719177246,
"E": -11.222594261169434
}
}
},
{
"ex_id": "aqua-test-205",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.753251075744629,
"scores": {
"A": -9.721230506896973,
"B": -8.277044296264648,
"C": -10.387093544006348,
"D": -11.78427791595459,
"E": -11.030295372009277
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.736443042755127,
"scores": {
"A": -7.844253063201904,
"B": -9.998329162597656,
"C": -11.479426383972168,
"D": -11.290699005126953,
"E": -12.580696105957031
}
}
},
{
"ex_id": "aqua-test-206",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.18168067932128906,
"scores": {
"A": -11.602930068969727,
"B": -10.961795806884766,
"C": -11.143476486206055,
"D": -12.837438583374023,
"E": -14.00632095336914
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.0302047729492188,
"scores": {
"A": -8.013933181762695,
"B": -11.044137954711914,
"C": -12.337331771850586,
"D": -11.77204704284668,
"E": -14.158761024475098
}
}
},
{
"ex_id": "aqua-test-207",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -3.332465171813965,
"scores": {
"A": -12.935659408569336,
"B": -13.335750579833984,
"C": -10.853610038757324,
"D": -15.803115844726562,
"E": -14.186075210571289
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -8.302919387817383,
"scores": {
"A": -6.643090724945068,
"B": -11.031190872192383,
"C": -12.203893661499023,
"D": -15.097414016723633,
"E": -14.94601058959961
}
}
},
{
"ex_id": "aqua-test-208",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.8153915405273438,
"scores": {
"A": -12.07571029663086,
"B": -10.124650955200195,
"C": -11.729717254638672,
"D": -10.940042495727539,
"E": -13.055669784545898
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.7086524963378906,
"scores": {
"A": -9.813956260681152,
"B": -11.153253555297852,
"C": -13.102922439575195,
"D": -11.522608757019043,
"E": -12.09807300567627
}
}
},
{
"ex_id": "aqua-test-209",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.9387226104736328,
"scores": {
"A": -10.875650405883789,
"B": -9.770784378051758,
"C": -13.167339324951172,
"D": -10.202999114990234,
"E": -8.832061767578125
}
},
"ablated": {
"pred_label": "E",
"correct": true,
"margin": 0.02905750274658203,
"scores": {
"A": -6.857519149780273,
"B": -8.824195861816406,
"C": -11.01872730255127,
"D": -7.9446258544921875,
"E": -6.828461647033691
}
}
},
{
"ex_id": "aqua-test-210",
"gold": "E",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -0.5599374771118164,
"scores": {
"A": -10.820873260498047,
"B": -10.8345947265625,
"C": -10.908698081970215,
"D": -12.616942405700684,
"E": -11.380810737609863
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.7156219482421875,
"scores": {
"A": -8.604469299316406,
"B": -10.140972137451172,
"C": -9.98969841003418,
"D": -9.52783203125,
"E": -9.320091247558594
}
}
},
{
"ex_id": "aqua-test-211",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.5010337829589844,
"scores": {
"A": -9.232921600341797,
"B": -8.809791564941406,
"C": -10.77252197265625,
"D": -11.31082534790039,
"E": -9.859048843383789
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.274179458618164,
"scores": {
"A": -6.612133979797363,
"B": -7.950355529785156,
"C": -10.069632530212402,
"D": -8.886313438415527,
"E": -8.989130020141602
}
}
},
{
"ex_id": "aqua-test-212",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.515838623046875,
"scores": {
"A": -11.57960319519043,
"B": -9.604219436645508,
"C": -11.120058059692383,
"D": -11.739898681640625,
"E": -12.83167839050293
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.382899284362793,
"scores": {
"A": -4.981387138366699,
"B": -8.364286422729492,
"C": -11.265626907348633,
"D": -9.413225173950195,
"E": -11.893355369567871
}
}
},
{
"ex_id": "aqua-test-213",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.7208938598632812,
"scores": {
"A": -13.409758567810059,
"B": -12.505935668945312,
"C": -15.226829528808594,
"D": -15.019231796264648,
"E": -13.97078800201416
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.587521553039551,
"scores": {
"A": -7.238009452819824,
"B": -11.375316619873047,
"C": -13.825531005859375,
"D": -12.193073272705078,
"E": -12.065244674682617
}
}
},
{
"ex_id": "aqua-test-214",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -3.253298759460449,
"scores": {
"A": -10.732587814331055,
"B": -9.240824699401855,
"C": -8.385510444641113,
"D": -11.663347244262695,
"E": -11.638809204101562
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.647817611694336,
"scores": {
"A": -7.310015678405762,
"B": -10.763197898864746,
"C": -10.126215934753418,
"D": -9.662254333496094,
"E": -10.957833290100098
}
}
},
{
"ex_id": "aqua-test-215",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -6.795169830322266,
"scores": {
"A": -22.99382781982422,
"B": -18.132843017578125,
"C": -16.198657989501953,
"D": -22.130014419555664,
"E": -22.1824893951416
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 0.6526165008544922,
"scores": {
"A": -8.266807556152344,
"B": -9.545059204101562,
"C": -8.919424057006836,
"D": -11.000604629516602,
"E": -11.521963119506836
}
}
},
{
"ex_id": "aqua-test-216",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.6342000961303711,
"scores": {
"A": -12.792722702026367,
"B": -9.749557495117188,
"C": -10.383757591247559,
"D": -12.744852066040039,
"E": -12.79257583618164
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.935434341430664,
"scores": {
"A": -7.502462387084961,
"B": -9.681203842163086,
"C": -10.437896728515625,
"D": -10.083349227905273,
"E": -10.514593124389648
}
}
},
{
"ex_id": "aqua-test-217",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.419353485107422,
"scores": {
"A": -12.823972702026367,
"B": -8.596864700317383,
"C": -12.047069549560547,
"D": -15.540777206420898,
"E": -12.016218185424805
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.360109329223633,
"scores": {
"A": -6.694489479064941,
"B": -7.449652671813965,
"C": -9.901532173156738,
"D": -10.334230422973633,
"E": -9.054598808288574
}
}
},
{
"ex_id": "aqua-test-218",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 0.45700550079345703,
"scores": {
"A": -9.913926124572754,
"B": -10.370931625366211,
"C": -11.4942045211792,
"D": -12.226011276245117,
"E": -11.360005378723145
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 2.9241886138916016,
"scores": {
"A": -6.717060089111328,
"B": -9.64124870300293,
"C": -12.240556716918945,
"D": -12.319646835327148,
"E": -12.892836570739746
}
}
},
{
"ex_id": "aqua-test-219",
"gold": "C",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -0.8197288513183594,
"scores": {
"A": -10.761848449707031,
"B": -10.766995429992676,
"C": -11.58157730102539,
"D": -11.309408187866211,
"E": -10.906158447265625
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -5.418631553649902,
"scores": {
"A": -6.581605911254883,
"B": -10.751213073730469,
"C": -12.000237464904785,
"D": -12.113553047180176,
"E": -13.230367660522461
}
}
},
{
"ex_id": "aqua-test-220",
"gold": "E",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -1.4230737686157227,
"scores": {
"A": -9.55500316619873,
"B": -10.829366683959961,
"C": -12.407928466796875,
"D": -12.210527420043945,
"E": -10.978076934814453
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -8.069978713989258,
"scores": {
"A": -6.391247272491455,
"B": -12.834989547729492,
"C": -14.329586029052734,
"D": -11.875961303710938,
"E": -14.461225509643555
}
}
},
{
"ex_id": "aqua-test-221",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.1843414306640625,
"scores": {
"A": -13.276018142700195,
"B": -8.882017135620117,
"C": -8.697675704956055,
"D": -11.192451477050781,
"E": -12.547571182250977
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.23614788055419922,
"scores": {
"A": -7.982804298400879,
"B": -8.148253440856934,
"C": -7.680639266967773,
"D": -9.554062843322754,
"E": -7.916787147521973
}
}
},
{
"ex_id": "aqua-test-222",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.8982372283935547,
"scores": {
"A": -10.80961799621582,
"B": -9.526689529418945,
"C": -12.049747467041016,
"D": -13.4249267578125,
"E": -14.212126731872559
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.658107757568359,
"scores": {
"A": -6.3620195388793945,
"B": -8.394012451171875,
"C": -11.750795364379883,
"D": -11.020127296447754,
"E": -11.600711822509766
}
}
},
{
"ex_id": "aqua-test-223",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.090752601623535,
"scores": {
"A": -10.285022735595703,
"B": -7.155424118041992,
"C": -10.246176719665527,
"D": -10.93359375,
"E": -11.335384368896484
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.092008590698242,
"scores": {
"A": -4.6826276779174805,
"B": -8.774636268615723,
"C": -12.371101379394531,
"D": -11.170863151550293,
"E": -13.101846694946289
}
}
},
{
"ex_id": "aqua-test-224",
"gold": "D",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -3.33624267578125,
"scores": {
"A": -9.653928756713867,
"B": -10.031352996826172,
"C": -10.045028686523438,
"D": -12.990171432495117,
"E": -13.378705978393555
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -7.7593231201171875,
"scores": {
"A": -6.229616165161133,
"B": -9.94646167755127,
"C": -11.709576606750488,
"D": -13.98893928527832,
"E": -14.078731536865234
}
}
},
{
"ex_id": "aqua-test-225",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.9616122245788574,
"scores": {
"A": -9.889963150024414,
"B": -7.453649997711182,
"C": -8.71200942993164,
"D": -10.415262222290039,
"E": -10.100379943847656
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.348393440246582,
"scores": {
"A": -7.418603897094727,
"B": -8.728095054626465,
"C": -9.700243949890137,
"D": -8.766997337341309,
"E": -10.309947967529297
}
}
},
{
"ex_id": "aqua-test-226",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -0.807805061340332,
"scores": {
"A": -9.130120277404785,
"B": -8.975384712219238,
"C": -8.7509126663208,
"D": -10.859039306640625,
"E": -9.558717727661133
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.761455535888672,
"scores": {
"A": -5.507261276245117,
"B": -8.399494171142578,
"C": -9.425889015197754,
"D": -8.896235466003418,
"E": -8.268716812133789
}
}
},
{
"ex_id": "aqua-test-227",
"gold": "B",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.220867156982422,
"scores": {
"A": -14.620243072509766,
"B": -13.570455551147461,
"C": -11.349588394165039,
"D": -11.514875411987305,
"E": -11.799421310424805
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.755785942077637,
"scores": {
"A": -8.976465225219727,
"B": -13.732251167297363,
"C": -14.044622421264648,
"D": -10.583160400390625,
"E": -14.6570405960083
}
}
},
{
"ex_id": "aqua-test-228",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.06908798217773438,
"scores": {
"A": -13.294260025024414,
"B": -10.70706558227539,
"C": -10.776153564453125,
"D": -14.082728385925293,
"E": -14.882830619812012
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.2796850204467773,
"scores": {
"A": -7.156650543212891,
"B": -8.436335563659668,
"C": -9.495584487915039,
"D": -10.117116928100586,
"E": -8.917889595031738
}
}
},
{
"ex_id": "aqua-test-229",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -4.243276596069336,
"scores": {
"A": -13.26862907409668,
"B": -9.949518203735352,
"C": -14.192794799804688,
"D": -13.284774780273438,
"E": -10.620906829833984
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.551429748535156,
"scores": {
"A": -7.223015785217285,
"B": -8.682937622070312,
"C": -11.774445533752441,
"D": -8.85659408569336,
"E": -8.720208168029785
}
}
},
{
"ex_id": "aqua-test-230",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.39587879180908203,
"scores": {
"A": -9.808959007263184,
"B": -9.706989288330078,
"C": -10.10286808013916,
"D": -11.030524253845215,
"E": -11.224230766296387
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.8247079849243164,
"scores": {
"A": -7.614650726318359,
"B": -10.1624174118042,
"C": -10.439358711242676,
"D": -8.709356307983398,
"E": -9.494927406311035
}
}
},
{
"ex_id": "aqua-test-231",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -1.5809459686279297,
"scores": {
"A": -14.210000991821289,
"B": -11.604068756103516,
"C": -10.744209289550781,
"D": -14.023458480834961,
"E": -12.325155258178711
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.2639245986938477,
"scores": {
"A": -6.26756477355957,
"B": -7.757082939147949,
"C": -7.7208757400512695,
"D": -8.944690704345703,
"E": -8.531489372253418
}
}
},
{
"ex_id": "aqua-test-232",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.394481658935547,
"scores": {
"A": -10.429929733276367,
"B": -8.23812484741211,
"C": -10.50958251953125,
"D": -10.852899551391602,
"E": -11.632606506347656
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.812300682067871,
"scores": {
"A": -6.170710563659668,
"B": -8.007887840270996,
"C": -9.030508041381836,
"D": -8.483603477478027,
"E": -10.983011245727539
}
}
},
{
"ex_id": "aqua-test-233",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.781177520751953,
"scores": {
"A": -14.271334648132324,
"B": -11.490157127380371,
"C": -12.547819137573242,
"D": -12.209135055541992,
"E": -13.584470748901367
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 0.4316978454589844,
"scores": {
"A": -9.124231338500977,
"B": -9.750629425048828,
"C": -10.782835006713867,
"D": -9.555929183959961,
"E": -11.292634963989258
}
}
},
{
"ex_id": "aqua-test-234",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.7564897537231445,
"scores": {
"A": -11.012960433959961,
"B": -9.256470680236816,
"C": -11.204137802124023,
"D": -11.356718063354492,
"E": -11.360950469970703
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 3.251865863800049,
"scores": {
"A": -5.590026378631592,
"B": -8.84189224243164,
"C": -12.897504806518555,
"D": -9.304344177246094,
"E": -11.349357604980469
}
}
},
{
"ex_id": "aqua-test-235",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.0572433471679688,
"scores": {
"A": -11.001571655273438,
"B": -8.944328308105469,
"C": -10.169864654541016,
"D": -11.891605377197266,
"E": -10.417022705078125
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.4765863418579102,
"scores": {
"A": -7.271252632141113,
"B": -8.747838973999023,
"C": -9.36155891418457,
"D": -10.69479751586914,
"E": -9.831280708312988
}
}
},
{
"ex_id": "aqua-test-236",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.42516517639160156,
"scores": {
"A": -10.618110656738281,
"B": -10.19294548034668,
"C": -10.441062927246094,
"D": -13.496698379516602,
"E": -11.223958969116211
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 2.4833927154541016,
"scores": {
"A": -7.2196502685546875,
"B": -9.703042984008789,
"C": -9.82933235168457,
"D": -12.09399700164795,
"E": -12.26220989227295
}
}
},
{
"ex_id": "aqua-test-237",
"gold": "D",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -2.568614959716797,
"scores": {
"A": -14.478540420532227,
"B": -14.94182014465332,
"C": -18.723243713378906,
"D": -17.047155380249023,
"E": -17.837533950805664
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.232480049133301,
"scores": {
"A": -8.632494926452637,
"B": -12.854435920715332,
"C": -15.726949691772461,
"D": -11.864974975585938,
"E": -14.554712295532227
}
}
},
{
"ex_id": "aqua-test-238",
"gold": "B",
"baseline": {
"pred_label": "D",
"correct": false,
"margin": -0.7253379821777344,
"scores": {
"A": -12.960672378540039,
"B": -12.267729759216309,
"C": -12.900382995605469,
"D": -11.542391777038574,
"E": -13.208388328552246
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.6987123489379883,
"scores": {
"A": -8.835456848144531,
"B": -11.53416919708252,
"C": -12.405285835266113,
"D": -10.377281188964844,
"E": -12.166045188903809
}
}
},
{
"ex_id": "aqua-test-239",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -1.5075511932373047,
"scores": {
"A": -10.213048934936523,
"B": -9.266622543334961,
"C": -8.705497741699219,
"D": -12.055715560913086,
"E": -11.688860893249512
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 3.3363800048828125,
"scores": {
"A": -6.8600263595581055,
"B": -10.196406364440918,
"C": -11.205855369567871,
"D": -13.323162078857422,
"E": -12.765695571899414
}
}
},
{
"ex_id": "aqua-test-240",
"gold": "E",
"baseline": {
"pred_label": "D",
"correct": false,
"margin": -0.44985103607177734,
"scores": {
"A": -15.143648147583008,
"B": -14.28840446472168,
"C": -18.351917266845703,
"D": -14.222793579101562,
"E": -14.67264461517334
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.2502288818359375,
"scores": {
"A": -9.40713882446289,
"B": -11.652059555053711,
"C": -14.1944580078125,
"D": -11.099084854125977,
"E": -12.657367706298828
}
}
},
{
"ex_id": "aqua-test-241",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -4.373614311218262,
"scores": {
"A": -10.808015823364258,
"B": -8.874855041503906,
"C": -10.419958114624023,
"D": -13.248469352722168,
"E": -12.38242244720459
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.841573715209961,
"scores": {
"A": -8.555763244628906,
"B": -12.585406303405762,
"C": -12.206342697143555,
"D": -12.397336959838867,
"E": -11.421467781066895
}
}
},
{
"ex_id": "aqua-test-242",
"gold": "C",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -2.8799476623535156,
"scores": {
"A": -9.328302383422852,
"B": -10.89498519897461,
"C": -12.208250045776367,
"D": -12.965248107910156,
"E": -11.71044921875
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -7.6321868896484375,
"scores": {
"A": -6.5676727294921875,
"B": -11.704992294311523,
"C": -14.199859619140625,
"D": -14.442285537719727,
"E": -14.39041519165039
}
}
},
{
"ex_id": "aqua-test-243",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -6.371611595153809,
"scores": {
"A": -15.65360164642334,
"B": -9.281990051269531,
"C": -11.275196075439453,
"D": -13.858661651611328,
"E": -12.75399398803711
}
},
"ablated": {
"pred_label": "B",
"correct": false,
"margin": -1.5444035530090332,
"scores": {
"A": -8.703460693359375,
"B": -7.159057140350342,
"C": -9.622814178466797,
"D": -11.823188781738281,
"E": -12.516307830810547
}
}
},
{
"ex_id": "aqua-test-244",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.365335464477539,
"scores": {
"A": -13.74870491027832,
"B": -12.62894058227539,
"C": -11.383369445800781,
"D": -12.943933486938477,
"E": -12.694005966186523
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 3.0395584106445312,
"scores": {
"A": -6.322815895080566,
"B": -9.362374305725098,
"C": -10.0859956741333,
"D": -10.893784523010254,
"E": -12.42809772491455
}
}
},
{
"ex_id": "aqua-test-245",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.3645496368408203,
"scores": {
"A": -11.810342788696289,
"B": -11.149602890014648,
"C": -11.514152526855469,
"D": -13.367142677307129,
"E": -13.141554832458496
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.503069877624512,
"scores": {
"A": -7.401782989501953,
"B": -13.384897232055664,
"C": -13.904852867126465,
"D": -14.09384822845459,
"E": -15.8029146194458
}
}
},
{
"ex_id": "aqua-test-246",
"gold": "B",
"baseline": {
"pred_label": "E",
"correct": false,
"margin": -1.197403907775879,
"scores": {
"A": -13.818582534790039,
"B": -14.779563903808594,
"C": -15.105998992919922,
"D": -14.267341613769531,
"E": -13.582159996032715
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.314557075500488,
"scores": {
"A": -9.649901390075684,
"B": -13.964458465576172,
"C": -16.527318954467773,
"D": -12.236238479614258,
"E": -13.367414474487305
}
}
},
{
"ex_id": "aqua-test-247",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -4.482232093811035,
"scores": {
"A": -15.076606750488281,
"B": -10.594374656677246,
"C": -13.674264907836914,
"D": -13.522222518920898,
"E": -13.801025390625
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 0.8972492218017578,
"scores": {
"A": -9.801979064941406,
"B": -11.38330078125,
"C": -12.110631942749023,
"D": -10.699228286743164,
"E": -12.660514831542969
}
}
},
{
"ex_id": "aqua-test-248",
"gold": "D",
"baseline": {
"pred_label": "E",
"correct": false,
"margin": -3.480362892150879,
"scores": {
"A": -14.004387855529785,
"B": -13.113205909729004,
"C": -13.897481918334961,
"D": -15.721760749816895,
"E": -12.241397857666016
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.1865768432617188,
"scores": {
"A": -8.702154159545898,
"B": -9.582113265991211,
"C": -11.823348999023438,
"D": -11.888731002807617,
"E": -9.788232803344727
}
}
},
{
"ex_id": "aqua-test-249",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.821441650390625,
"scores": {
"A": -10.321834564208984,
"B": -8.848502159118652,
"C": -8.027060508728027,
"D": -11.628623962402344,
"E": -11.091792106628418
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.602280616760254,
"scores": {
"A": -6.007650375366211,
"B": -7.634098052978516,
"C": -8.609930992126465,
"D": -8.443798065185547,
"E": -8.685563087463379
}
}
},
{
"ex_id": "aqua-test-250",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -1.9709901809692383,
"scores": {
"A": -10.511480331420898,
"B": -11.609233856201172,
"C": -10.008577346801758,
"D": -14.576160430908203,
"E": -11.979567527770996
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -7.598491668701172,
"scores": {
"A": -8.06104850769043,
"B": -11.577505111694336,
"C": -12.608949661254883,
"D": -14.756206512451172,
"E": -15.659540176391602
}
}
},
{
"ex_id": "aqua-test-251",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.07524585723876953,
"scores": {
"A": -9.908409118652344,
"B": -9.833163261413574,
"C": -12.424334526062012,
"D": -11.275071144104004,
"E": -10.72103214263916
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.978281021118164,
"scores": {
"A": -9.318894386291504,
"B": -12.297175407409668,
"C": -13.513100624084473,
"D": -12.114720344543457,
"E": -11.161179542541504
}
}
},
{
"ex_id": "aqua-test-252",
"gold": "C",
"baseline": {
"pred_label": "E",
"correct": false,
"margin": -0.18950843811035156,
"scores": {
"A": -10.37520980834961,
"B": -9.517382621765137,
"C": -9.654356002807617,
"D": -10.586039543151855,
"E": -9.464847564697266
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.239790916442871,
"scores": {
"A": -6.840622901916504,
"B": -8.500364303588867,
"C": -9.080413818359375,
"D": -9.140447616577148,
"E": -8.753503799438477
}
}
},
{
"ex_id": "aqua-test-253",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.153468132019043,
"scores": {
"A": -12.344278335571289,
"B": -10.064801216125488,
"C": -10.924477577209473,
"D": -12.967808723449707,
"E": -13.218269348144531
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -7.156527519226074,
"scores": {
"A": -4.787992477416992,
"B": -6.941324234008789,
"C": -8.889881134033203,
"D": -9.577656745910645,
"E": -11.944519996643066
}
}
}
],
"flip_rows": [
{
"ex_id": "aqua-test-2",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.2555389404296875,
"scores": {
"A": -11.233211517333984,
"B": -10.210750579833984,
"C": -13.17569351196289,
"D": -12.437894821166992,
"E": -10.466289520263672
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -7.949008941650391,
"scores": {
"A": -6.06699275970459,
"B": -14.01600170135498,
"C": -17.137845993041992,
"D": -15.27363109588623,
"E": -15.64785099029541
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 1.3901653289794922,
"scores": {
"A": -9.876066207885742,
"B": -8.48590087890625,
"C": -10.311349868774414,
"D": -10.88787841796875,
"E": -10.712956428527832
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 0.25553417205810547,
"scores": {
"A": -11.233206748962402,
"B": -10.210748672485352,
"C": -13.175691604614258,
"D": -12.437891006469727,
"E": -10.466282844543457
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 0.25553417205810547,
"scores": {
"A": -11.233206748962402,
"B": -10.210748672485352,
"C": -13.175691604614258,
"D": -12.437891006469727,
"E": -10.466282844543457
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -9.454591751098633,
"scores": {
"A": -5.419614791870117,
"B": -14.87420654296875,
"C": -18.10893440246582,
"D": -16.861085891723633,
"E": -17.0190372467041
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 1.7489757537841797,
"scores": {
"A": -10.085409164428711,
"B": -8.336433410644531,
"C": -10.132183074951172,
"D": -10.820955276489258,
"E": -10.653312683105469
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -6.734705924987793,
"scores": {
"A": -4.145016670227051,
"B": -10.879722595214844,
"C": -13.257842063903809,
"D": -13.037062644958496,
"E": -12.864790916442871
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -7.94901180267334,
"scores": {
"A": -6.066986083984375,
"B": -14.015997886657715,
"C": -17.13784408569336,
"D": -15.273627281188965,
"E": -15.647849082946777
}
}
},
{
"ex_id": "aqua-test-5",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.953394889831543,
"scores": {
"A": -11.989723205566406,
"B": -10.97428035736084,
"C": -12.035185813903809,
"D": -11.961091041564941,
"E": -11.927675247192383
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.1877222061157227,
"scores": {
"A": -7.596570014953613,
"B": -9.784292221069336,
"C": -11.036355018615723,
"D": -9.200647354125977,
"E": -10.078826904296875
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 0.16080760955810547,
"scores": {
"A": -9.10636043548584,
"B": -8.253414154052734,
"C": -8.41422176361084,
"D": -9.317205429077148,
"E": -9.607017517089844
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 0.9533920288085938,
"scores": {
"A": -11.989713668823242,
"B": -10.974275588989258,
"C": -12.035181045532227,
"D": -11.96108627319336,
"E": -11.927667617797852
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 0.9533920288085938,
"scores": {
"A": -11.989713668823242,
"B": -10.974275588989258,
"C": -12.035181045532227,
"D": -11.96108627319336,
"E": -11.927667617797852
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -1.1618080139160156,
"scores": {
"A": -4.276651382446289,
"B": -5.438459396362305,
"C": -7.316925048828125,
"D": -5.728630065917969,
"E": -5.395031929016113
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 0.10317325592041016,
"scores": {
"A": -9.095845222473145,
"B": -8.302581787109375,
"C": -8.405755043029785,
"D": -9.327710151672363,
"E": -9.620680809020996
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -2.9572091102600098,
"scores": {
"A": -7.572333812713623,
"B": -10.529542922973633,
"C": -11.914779663085938,
"D": -11.758302688598633,
"E": -10.997583389282227
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -2.1877198219299316,
"scores": {
"A": -7.59656286239624,
"B": -9.784282684326172,
"C": -11.036344528198242,
"D": -9.200637817382812,
"E": -10.078821182250977
}
}
},
{
"ex_id": "aqua-test-9",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.0833330154418945,
"scores": {
"A": -11.261035919189453,
"B": -8.873366355895996,
"C": -9.95669937133789,
"D": -12.33233642578125,
"E": -13.964797973632812
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.0129852294921875,
"scores": {
"A": -7.305376052856445,
"B": -11.318361282348633,
"C": -11.48718547821045,
"D": -13.66738224029541,
"E": -15.269938468933105
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 1.1214942932128906,
"scores": {
"A": -10.059557914733887,
"B": -7.437822341918945,
"C": -8.559316635131836,
"D": -10.860220909118652,
"E": -12.348688125610352
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 1.0833320617675781,
"scores": {
"A": -11.26103401184082,
"B": -8.873364448547363,
"C": -9.956696510314941,
"D": -12.332334518432617,
"E": -13.964792251586914
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 1.0833320617675781,
"scores": {
"A": -11.26103401184082,
"B": -8.873364448547363,
"C": -9.956696510314941,
"D": -12.332334518432617,
"E": -13.964792251586914
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -3.571709632873535,
"scores": {
"A": -4.2528157234191895,
"B": -7.824525356292725,
"C": -7.8429999351501465,
"D": -10.935527801513672,
"E": -12.20101547241211
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 1.0738887786865234,
"scores": {
"A": -9.967976570129395,
"B": -7.351049423217773,
"C": -8.424938201904297,
"D": -10.737732887268066,
"E": -12.164700508117676
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -3.493661880493164,
"scores": {
"A": -6.91084098815918,
"B": -10.404502868652344,
"C": -11.570510864257812,
"D": -13.403081893920898,
"E": -13.38132095336914
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -4.012986183166504,
"scores": {
"A": -7.3053789138793945,
"B": -11.318365097045898,
"C": -11.487188339233398,
"D": -13.667381286621094,
"E": -15.269935607910156
}
}
},
{
"ex_id": "aqua-test-15",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.6604747772216797,
"scores": {
"A": -11.07632064819336,
"B": -10.41584587097168,
"C": -13.610551834106445,
"D": -15.297096252441406,
"E": -13.782489776611328
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.743229389190674,
"scores": {
"A": -6.1119704246521,
"B": -10.855199813842773,
"C": -11.251523971557617,
"D": -11.053302764892578,
"E": -13.566537857055664
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 0.7353744506835938,
"scores": {
"A": -9.318084716796875,
"B": -8.582710266113281,
"C": -10.295574188232422,
"D": -11.628917694091797,
"E": -11.761164665222168
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 0.6604728698730469,
"scores": {
"A": -11.076318740844727,
"B": -10.41584587097168,
"C": -13.610550880432129,
"D": -15.297094345092773,
"E": -13.782489776611328
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 0.6604728698730469,
"scores": {
"A": -11.076318740844727,
"B": -10.41584587097168,
"C": -13.610550880432129,
"D": -15.297094345092773,
"E": -13.782489776611328
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -3.7481865882873535,
"scores": {
"A": -7.6179327964782715,
"B": -11.366119384765625,
"C": -11.271610260009766,
"D": -11.955974578857422,
"E": -14.104389190673828
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 0.4087352752685547,
"scores": {
"A": -9.301450729370117,
"B": -8.892715454101562,
"C": -10.551101684570312,
"D": -11.64991569519043,
"E": -11.84024429321289
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -3.2450222969055176,
"scores": {
"A": -5.506385326385498,
"B": -8.751407623291016,
"C": -10.76029109954834,
"D": -10.876399040222168,
"E": -11.42264461517334
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -4.743227005004883,
"scores": {
"A": -6.111969947814941,
"B": -10.855196952819824,
"C": -11.251523971557617,
"D": -11.053295135498047,
"E": -13.566534042358398
}
}
},
{
"ex_id": "aqua-test-16",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 2.796067237854004,
"scores": {
"A": -12.479905128479004,
"B": -10.507231712341309,
"C": -7.711164474487305,
"D": -12.827747344970703,
"E": -12.807977676391602
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.861086368560791,
"scores": {
"A": -7.834758281707764,
"B": -9.467061996459961,
"C": -8.695844650268555,
"D": -9.597942352294922,
"E": -11.696287155151367
}
},
"patched_0": {
"pred_label": "C",
"correct": true,
"margin": 1.3996143341064453,
"scores": {
"A": -10.024466514587402,
"B": -9.449155807495117,
"C": -8.049541473388672,
"D": -10.041764259338379,
"E": -11.3864164352417
}
},
"patched_01": {
"pred_label": "C",
"correct": true,
"margin": 2.7960658073425293,
"scores": {
"A": -12.479902267456055,
"B": -10.507226943969727,
"C": -7.711161136627197,
"D": -12.827741622924805,
"E": -12.807975769042969
}
},
"patched_full": {
"pred_label": "C",
"correct": true,
"margin": 2.7960658073425293,
"scores": {
"A": -12.479902267456055,
"B": -10.507226943969727,
"C": -7.711161136627197,
"D": -12.827741622924805,
"E": -12.807975769042969
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -1.8937911987304688,
"scores": {
"A": -9.395095825195312,
"B": -11.238801956176758,
"C": -11.288887023925781,
"D": -11.238336563110352,
"E": -13.501079559326172
}
},
"control_time_shuffled": {
"pred_label": "C",
"correct": true,
"margin": 1.4178781509399414,
"scores": {
"A": -9.956263542175293,
"B": -9.215209007263184,
"C": -7.797330856323242,
"D": -9.89533805847168,
"E": -11.219152450561523
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -0.8867502212524414,
"scores": {
"A": -6.518403053283691,
"B": -7.824748992919922,
"C": -7.405153274536133,
"D": -6.778932571411133,
"E": -8.86016845703125
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -0.8610877990722656,
"scores": {
"A": -7.83476448059082,
"B": -9.467066764831543,
"C": -8.695852279663086,
"D": -9.597952842712402,
"E": -11.696298599243164
}
}
},
{
"ex_id": "aqua-test-21",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.7691888809204102,
"scores": {
"A": -10.39490795135498,
"B": -9.62571907043457,
"C": -12.538268089294434,
"D": -12.220020294189453,
"E": -11.351235389709473
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.5374608039855957,
"scores": {
"A": -6.129680156707764,
"B": -9.66714096069336,
"C": -10.800978660583496,
"D": -10.67288875579834,
"E": -11.187762260437012
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 1.620218276977539,
"scores": {
"A": -9.997575759887695,
"B": -8.377357482910156,
"C": -10.22830867767334,
"D": -10.391581535339355,
"E": -11.034626960754395
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 0.7691860198974609,
"scores": {
"A": -10.394902229309082,
"B": -9.625716209411621,
"C": -12.538259506225586,
"D": -12.220011711120605,
"E": -11.351226806640625
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 0.7691860198974609,
"scores": {
"A": -10.394902229309082,
"B": -9.625716209411621,
"C": -12.538259506225586,
"D": -12.220011711120605,
"E": -11.351226806640625
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -4.422626972198486,
"scores": {
"A": -5.9301066398620605,
"B": -10.352733612060547,
"C": -10.554861068725586,
"D": -10.926000595092773,
"E": -12.400789260864258
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 1.7119264602661133,
"scores": {
"A": -10.018722534179688,
"B": -8.272918701171875,
"C": -9.984845161437988,
"D": -10.223934173583984,
"E": -10.815324783325195
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -2.364543914794922,
"scores": {
"A": -10.713298797607422,
"B": -13.077842712402344,
"C": -13.290660858154297,
"D": -12.429567337036133,
"E": -14.533975601196289
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -3.5374622344970703,
"scores": {
"A": -6.129676818847656,
"B": -9.667139053344727,
"C": -10.800968170166016,
"D": -10.67288589477539,
"E": -11.18775463104248
}
}
},
{
"ex_id": "aqua-test-25",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.06520843505859375,
"scores": {
"A": -12.949111938476562,
"B": -12.246522903442383,
"C": -12.181314468383789,
"D": -12.397541046142578,
"E": -13.614669799804688
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.5791339874267578,
"scores": {
"A": -8.643856048583984,
"B": -10.894746780395508,
"C": -10.222990036010742,
"D": -9.472063064575195,
"E": -10.86764144897461
}
},
"patched_0": {
"pred_label": "B",
"correct": false,
"margin": -0.1788043975830078,
"scores": {
"A": -9.795440673828125,
"B": -8.286870002746582,
"C": -8.46567440032959,
"D": -9.396930694580078,
"E": -10.46731185913086
}
},
"patched_01": {
"pred_label": "C",
"correct": true,
"margin": 0.06521415710449219,
"scores": {
"A": -12.949119567871094,
"B": -12.246532440185547,
"C": -12.181318283081055,
"D": -12.397550582885742,
"E": -13.614681243896484
}
},
"patched_full": {
"pred_label": "C",
"correct": true,
"margin": 0.06521415710449219,
"scores": {
"A": -12.949119567871094,
"B": -12.246532440185547,
"C": -12.181318283081055,
"D": -12.397550582885742,
"E": -13.614681243896484
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -1.7965564727783203,
"scores": {
"A": -5.875622272491455,
"B": -8.843538284301758,
"C": -7.672178745269775,
"D": -7.943771839141846,
"E": -9.51207447052002
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": false,
"margin": -0.07470321655273438,
"scores": {
"A": -9.802331924438477,
"B": -8.519807815551758,
"C": -8.594511032104492,
"D": -9.518583297729492,
"E": -10.49337387084961
}
},
"control_shared_randvec": {
"pred_label": "C",
"correct": true,
"margin": 0.5046224594116211,
"scores": {
"A": -7.623855113983154,
"B": -8.038482666015625,
"C": -6.829197406768799,
"D": -7.33381986618042,
"E": -8.407615661621094
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -1.579136848449707,
"scores": {
"A": -8.643851280212402,
"B": -10.894744873046875,
"C": -10.22298812866211,
"D": -9.472061157226562,
"E": -10.86764144897461
}
}
},
{
"ex_id": "aqua-test-33",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 1.2154502868652344,
"scores": {
"A": -17.279247283935547,
"B": -18.187232971191406,
"C": -16.063796997070312,
"D": -19.143869400024414,
"E": -19.470874786376953
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.4487724304199219,
"scores": {
"A": -9.145519256591797,
"B": -10.157659530639648,
"C": -9.594291687011719,
"D": -10.095281600952148,
"E": -10.523807525634766
}
},
"patched_0": {
"pred_label": "C",
"correct": true,
"margin": 0.9382915496826172,
"scores": {
"A": -10.902482032775879,
"B": -9.475619316101074,
"C": -8.537327766418457,
"D": -10.088809967041016,
"E": -11.045086860656738
}
},
"patched_01": {
"pred_label": "C",
"correct": true,
"margin": 1.2154521942138672,
"scores": {
"A": -17.27924156188965,
"B": -18.187225341796875,
"C": -16.06378936767578,
"D": -19.143863677978516,
"E": -19.470867156982422
}
},
"patched_full": {
"pred_label": "C",
"correct": true,
"margin": 1.2154521942138672,
"scores": {
"A": -17.27924156188965,
"B": -18.187225341796875,
"C": -16.06378936767578,
"D": -19.143863677978516,
"E": -19.470867156982422
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -0.7954683303833008,
"scores": {
"A": -6.031587600708008,
"B": -6.501745223999023,
"C": -6.827055931091309,
"D": -8.70362663269043,
"E": -9.60076904296875
}
},
"control_time_shuffled": {
"pred_label": "C",
"correct": true,
"margin": 0.8526973724365234,
"scores": {
"A": -10.536826133728027,
"B": -8.904891014099121,
"C": -8.052193641662598,
"D": -9.56973648071289,
"E": -10.570178031921387
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -3.2699460983276367,
"scores": {
"A": -6.845300674438477,
"B": -7.745532989501953,
"C": -10.115246772766113,
"D": -9.863080024719238,
"E": -9.416638374328613
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -0.44877052307128906,
"scores": {
"A": -9.145517349243164,
"B": -10.157659530639648,
"C": -9.594287872314453,
"D": -10.095277786254883,
"E": -10.523809432983398
}
}
},
{
"ex_id": "aqua-test-39",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 1.704728126525879,
"scores": {
"A": -10.207995414733887,
"B": -11.912723541259766,
"C": -12.109935760498047,
"D": -14.276583671569824,
"E": -13.992156982421875
}
},
"ablated": {
"pred_label": "C",
"correct": false,
"margin": -0.4076976776123047,
"scores": {
"A": -10.194977760314941,
"B": -10.153923988342285,
"C": -9.787280082702637,
"D": -11.554168701171875,
"E": -10.806174278259277
}
},
"patched_0": {
"pred_label": "C",
"correct": false,
"margin": -0.2574920654296875,
"scores": {
"A": -9.42642593383789,
"B": -9.74307632446289,
"C": -9.168933868408203,
"D": -10.273834228515625,
"E": -9.463874816894531
}
},
"patched_01": {
"pred_label": "A",
"correct": true,
"margin": 1.7047252655029297,
"scores": {
"A": -10.207992553710938,
"B": -11.912717819213867,
"C": -12.109930038452148,
"D": -14.276582717895508,
"E": -13.99215316772461
}
},
"patched_full": {
"pred_label": "A",
"correct": true,
"margin": 1.7047252655029297,
"scores": {
"A": -10.207992553710938,
"B": -11.912717819213867,
"C": -12.109930038452148,
"D": -14.276582717895508,
"E": -13.99215316772461
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": true,
"margin": 0.4817485809326172,
"scores": {
"A": -8.8492431640625,
"B": -9.330991744995117,
"C": -9.828338623046875,
"D": -11.093040466308594,
"E": -10.96759033203125
}
},
"control_time_shuffled": {
"pred_label": "C",
"correct": false,
"margin": -0.15412521362304688,
"scores": {
"A": -9.60675048828125,
"B": -10.024761199951172,
"C": -9.452625274658203,
"D": -10.476186752319336,
"E": -9.598836898803711
}
},
"control_shared_randvec": {
"pred_label": "C",
"correct": false,
"margin": -0.17638206481933594,
"scores": {
"A": -9.201507568359375,
"B": -9.28551959991455,
"C": -9.025125503540039,
"D": -10.745898246765137,
"E": -10.075105667114258
}
},
"control_patch_nonshared": {
"pred_label": "C",
"correct": false,
"margin": -0.4076995849609375,
"scores": {
"A": -10.194982528686523,
"B": -10.153924942016602,
"C": -9.787282943725586,
"D": -11.554170608520508,
"E": -10.806177139282227
}
}
},
{
"ex_id": "aqua-test-47",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.20550537109375,
"scores": {
"A": -11.954267501831055,
"B": -12.503751754760742,
"C": -12.114371299743652,
"D": -13.045472145080566,
"E": -11.748762130737305
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.0157623291015625,
"scores": {
"A": -9.386420249938965,
"B": -11.835212707519531,
"C": -13.338075637817383,
"D": -12.148918151855469,
"E": -13.402182579040527
}
},
"patched_0": {
"pred_label": "C",
"correct": false,
"margin": -0.8217716217041016,
"scores": {
"A": -11.986827850341797,
"B": -12.437498092651367,
"C": -11.280074119567871,
"D": -12.387033462524414,
"E": -12.101845741271973
}
},
"patched_01": {
"pred_label": "E",
"correct": true,
"margin": 0.20550537109375,
"scores": {
"A": -11.954263687133789,
"B": -12.503748893737793,
"C": -12.114365577697754,
"D": -13.045466423034668,
"E": -11.748758316040039
}
},
"patched_full": {
"pred_label": "E",
"correct": true,
"margin": 0.20550537109375,
"scores": {
"A": -11.954263687133789,
"B": -12.503748893737793,
"C": -12.114365577697754,
"D": -13.045466423034668,
"E": -11.748758316040039
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -3.8673534393310547,
"scores": {
"A": -8.755905151367188,
"B": -11.388154983520508,
"C": -13.46282958984375,
"D": -11.259201049804688,
"E": -12.623258590698242
}
},
"control_time_shuffled": {
"pred_label": "C",
"correct": false,
"margin": -0.8881673812866211,
"scores": {
"A": -11.76992416381836,
"B": -12.467824935913086,
"C": -11.171812057495117,
"D": -12.422957420349121,
"E": -12.059979438781738
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -3.76485538482666,
"scores": {
"A": -6.755273818969727,
"B": -9.245403289794922,
"C": -11.986200332641602,
"D": -10.68335247039795,
"E": -10.520129203796387
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -4.0157623291015625,
"scores": {
"A": -9.386425018310547,
"B": -11.835214614868164,
"C": -13.338083267211914,
"D": -12.148921966552734,
"E": -13.40218734741211
}
}
},
{
"ex_id": "aqua-test-52",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.04578971862792969,
"scores": {
"A": -12.882274627685547,
"B": -9.855215072631836,
"C": -9.901004791259766,
"D": -11.499755859375,
"E": -10.678110122680664
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.4922494888305664,
"scores": {
"A": -5.136632442474365,
"B": -5.628881931304932,
"C": -6.605200290679932,
"D": -6.88695764541626,
"E": -6.429419994354248
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 0.19986343383789062,
"scores": {
"A": -10.225756645202637,
"B": -6.857089042663574,
"C": -7.056952476501465,
"D": -9.577616691589355,
"E": -9.770869255065918
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 0.04578971862792969,
"scores": {
"A": -12.882272720336914,
"B": -9.855213165283203,
"C": -9.901002883911133,
"D": -11.499753952026367,
"E": -10.678108215332031
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 0.04578971862792969,
"scores": {
"A": -12.882272720336914,
"B": -9.855213165283203,
"C": -9.901002883911133,
"D": -11.499753952026367,
"E": -10.678108215332031
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "B",
"correct": true,
"margin": 0.052666664123535156,
"scores": {
"A": -9.966214179992676,
"B": -9.754355430603027,
"C": -12.090447425842285,
"D": -10.788393020629883,
"E": -9.807022094726562
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 0.09168434143066406,
"scores": {
"A": -10.196533203125,
"B": -6.679924488067627,
"C": -6.771608829498291,
"D": -9.311458587646484,
"E": -9.494468688964844
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -0.650324821472168,
"scores": {
"A": -10.846677780151367,
"B": -11.497002601623535,
"C": -15.689571380615234,
"D": -14.434303283691406,
"E": -13.255485534667969
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -0.4922494888305664,
"scores": {
"A": -5.136631488800049,
"B": -5.628880977630615,
"C": -6.605197429656982,
"D": -6.886956691741943,
"E": -6.429421901702881
}
}
},
{
"ex_id": "aqua-test-57",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.14557647705078125,
"scores": {
"A": -13.898555755615234,
"B": -12.992910385131836,
"C": -14.371723175048828,
"D": -14.158893585205078,
"E": -13.138486862182617
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.6548147201538086,
"scores": {
"A": -10.324930191040039,
"B": -12.979744911193848,
"C": -12.848653793334961,
"D": -12.86312484741211,
"E": -12.547582626342773
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 0.3462409973144531,
"scores": {
"A": -12.841978073120117,
"B": -12.433549880981445,
"C": -13.434564590454102,
"D": -13.763471603393555,
"E": -12.779790878295898
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 0.14557647705078125,
"scores": {
"A": -13.898560523986816,
"B": -12.992914199829102,
"C": -14.371731758117676,
"D": -14.158897399902344,
"E": -13.138490676879883
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 0.14557647705078125,
"scores": {
"A": -13.898560523986816,
"B": -12.992914199829102,
"C": -14.371731758117676,
"D": -14.158897399902344,
"E": -13.138490676879883
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -1.6947002410888672,
"scores": {
"A": -8.799111366271973,
"B": -10.49381160736084,
"C": -10.079784393310547,
"D": -11.49155044555664,
"E": -11.507024765014648
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 0.21564388275146484,
"scores": {
"A": -12.912389755249023,
"B": -12.696745872497559,
"C": -13.669480323791504,
"D": -13.951339721679688,
"E": -12.927538871765137
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -1.7318696975708008,
"scores": {
"A": -10.47717571258545,
"B": -12.20904541015625,
"C": -12.458807945251465,
"D": -13.046340942382812,
"E": -11.932548522949219
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -2.6548194885253906,
"scores": {
"A": -10.324928283691406,
"B": -12.979747772216797,
"C": -12.848652839660645,
"D": -12.863126754760742,
"E": -12.547584533691406
}
}
},
{
"ex_id": "aqua-test-68",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.6945219039916992,
"scores": {
"A": -11.65401840209961,
"B": -10.95949649810791,
"C": -11.869510650634766,
"D": -12.070514678955078,
"E": -12.618841171264648
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.171473503112793,
"scores": {
"A": -9.029966354370117,
"B": -11.20143985748291,
"C": -11.244144439697266,
"D": -11.500038146972656,
"E": -10.598958015441895
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 0.03911018371582031,
"scores": {
"A": -10.191217422485352,
"B": -9.777387619018555,
"C": -9.816497802734375,
"D": -11.066892623901367,
"E": -10.560269355773926
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 0.6945209503173828,
"scores": {
"A": -11.654026985168457,
"B": -10.959506034851074,
"C": -11.869518280029297,
"D": -12.07052230834961,
"E": -12.61884593963623
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 0.6945209503173828,
"scores": {
"A": -11.654026985168457,
"B": -10.959506034851074,
"C": -11.869518280029297,
"D": -12.07052230834961,
"E": -12.61884593963623
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -2.0571775436401367,
"scores": {
"A": -8.271219253540039,
"B": -10.328396797180176,
"C": -9.616171836853027,
"D": -10.940016746520996,
"E": -9.240631103515625
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 0.05071544647216797,
"scores": {
"A": -10.167059898376465,
"B": -9.610998153686523,
"C": -9.661713600158691,
"D": -11.012078285217285,
"E": -10.494278907775879
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -1.482576847076416,
"scores": {
"A": -6.736016750335693,
"B": -8.21859359741211,
"C": -8.19207763671875,
"D": -8.997150421142578,
"E": -7.911620616912842
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -2.1714725494384766,
"scores": {
"A": -9.029961585998535,
"B": -11.201434135437012,
"C": -11.244138717651367,
"D": -11.50003433227539,
"E": -10.598953247070312
}
}
},
{
"ex_id": "aqua-test-78",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.0808143615722656,
"scores": {
"A": -12.794174194335938,
"B": -8.323003768920898,
"C": -11.403818130493164,
"D": -13.768218994140625,
"E": -13.847496032714844
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.647706031799316,
"scores": {
"A": -5.634004592895508,
"B": -10.281710624694824,
"C": -11.297346115112305,
"D": -12.075166702270508,
"E": -12.413890838623047
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 1.6711091995239258,
"scores": {
"A": -10.68982982635498,
"B": -7.3767290115356445,
"C": -9.04783821105957,
"D": -11.847247123718262,
"E": -11.526897430419922
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 3.0808143615722656,
"scores": {
"A": -12.794168472290039,
"B": -8.322998046875,
"C": -11.403812408447266,
"D": -13.768211364746094,
"E": -13.847491264343262
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 3.0808143615722656,
"scores": {
"A": -12.794168472290039,
"B": -8.322998046875,
"C": -11.403812408447266,
"D": -13.768211364746094,
"E": -13.847491264343262
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -3.6936893463134766,
"scores": {
"A": -5.412115097045898,
"B": -9.105804443359375,
"C": -10.826802253723145,
"D": -11.795136451721191,
"E": -9.426908493041992
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 1.8150687217712402,
"scores": {
"A": -11.064653396606445,
"B": -7.3195881843566895,
"C": -9.13465690612793,
"D": -11.857183456420898,
"E": -11.807918548583984
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -2.0817832946777344,
"scores": {
"A": -6.042266845703125,
"B": -8.12405014038086,
"C": -8.708685874938965,
"D": -9.721263885498047,
"E": -7.989676475524902
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -4.647706985473633,
"scores": {
"A": -5.633998870849609,
"B": -10.281705856323242,
"C": -11.297343254089355,
"D": -12.075161933898926,
"E": -12.413885116577148
}
}
},
{
"ex_id": "aqua-test-87",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.6314544677734375,
"scores": {
"A": -9.793952941894531,
"B": -9.162498474121094,
"C": -11.231021881103516,
"D": -12.002910614013672,
"E": -11.467964172363281
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.9217519760131836,
"scores": {
"A": -6.585877418518066,
"B": -9.50762939453125,
"C": -9.712257385253906,
"D": -9.212251663208008,
"E": -11.261186599731445
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 1.4228715896606445,
"scores": {
"A": -9.903858184814453,
"B": -8.480986595153809,
"C": -10.398112297058105,
"D": -10.929282188415527,
"E": -11.117732048034668
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 0.6314525604248047,
"scores": {
"A": -9.793954849243164,
"B": -9.16250228881836,
"C": -11.231022834777832,
"D": -12.002912521362305,
"E": -11.467966079711914
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 0.6314525604248047,
"scores": {
"A": -9.793954849243164,
"B": -9.16250228881836,
"C": -11.231022834777832,
"D": -12.002912521362305,
"E": -11.467966079711914
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -2.8357162475585938,
"scores": {
"A": -4.554224967956543,
"B": -7.389941215515137,
"C": -8.258695602416992,
"D": -7.952755928039551,
"E": -9.503331184387207
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 1.1273679733276367,
"scores": {
"A": -9.8775634765625,
"B": -8.750195503234863,
"C": -10.547350883483887,
"D": -10.983156204223633,
"E": -11.062116622924805
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -2.535149574279785,
"scores": {
"A": -3.4644904136657715,
"B": -5.999639987945557,
"C": -6.94714879989624,
"D": -6.253420352935791,
"E": -7.40266752243042
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -2.921750068664551,
"scores": {
"A": -6.585874557495117,
"B": -9.507624626159668,
"C": -9.712258338928223,
"D": -9.212246894836426,
"E": -11.261183738708496
}
}
},
{
"ex_id": "aqua-test-100",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.1513805389404297,
"scores": {
"A": -9.272323608398438,
"B": -9.739631652832031,
"C": -9.120943069458008,
"D": -10.063505172729492,
"E": -10.608749389648438
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.166820049285889,
"scores": {
"A": -4.778280735015869,
"B": -9.417329788208008,
"C": -10.945100784301758,
"D": -11.501747131347656,
"E": -13.226821899414062
}
},
"patched_0": {
"pred_label": "C",
"correct": true,
"margin": 0.42541027069091797,
"scores": {
"A": -8.251370429992676,
"B": -8.534682273864746,
"C": -7.825960159301758,
"D": -9.516815185546875,
"E": -10.607525825500488
}
},
"patched_01": {
"pred_label": "C",
"correct": true,
"margin": 0.1513843536376953,
"scores": {
"A": -9.272323608398438,
"B": -9.739627838134766,
"C": -9.120939254760742,
"D": -10.063497543334961,
"E": -10.608743667602539
}
},
"patched_full": {
"pred_label": "C",
"correct": true,
"margin": 0.1513843536376953,
"scores": {
"A": -9.272323608398438,
"B": -9.739627838134766,
"C": -9.120939254760742,
"D": -10.063497543334961,
"E": -10.608743667602539
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -5.7572784423828125,
"scores": {
"A": -5.2430009841918945,
"B": -9.474717140197754,
"C": -11.000279426574707,
"D": -10.994614601135254,
"E": -11.884474754333496
}
},
"control_time_shuffled": {
"pred_label": "C",
"correct": true,
"margin": 0.5618510246276855,
"scores": {
"A": -8.301986694335938,
"B": -8.291629791259766,
"C": -7.72977876663208,
"D": -9.342042922973633,
"E": -10.563093185424805
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -6.407781600952148,
"scores": {
"A": -5.431789398193359,
"B": -11.169084548950195,
"C": -11.839570999145508,
"D": -14.098028182983398,
"E": -14.046358108520508
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -6.166818618774414,
"scores": {
"A": -4.778277397155762,
"B": -9.417325019836426,
"C": -10.945096015930176,
"D": -11.501741409301758,
"E": -13.226816177368164
}
}
},
{
"ex_id": "aqua-test-103",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.1493282318115234,
"scores": {
"A": -9.748441696166992,
"B": -8.529296875,
"C": -9.693557739257812,
"D": -11.449222564697266,
"E": -9.678625106811523
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.7451763153076172,
"scores": {
"A": -7.868520736694336,
"B": -8.613697052001953,
"C": -10.544960975646973,
"D": -9.806873321533203,
"E": -8.439764022827148
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 1.2809290885925293,
"scores": {
"A": -8.969255447387695,
"B": -7.4534783363342285,
"C": -8.759740829467773,
"D": -10.289947509765625,
"E": -8.734407424926758
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 1.1493244171142578,
"scores": {
"A": -9.748445510864258,
"B": -8.529302597045898,
"C": -9.693565368652344,
"D": -11.449226379394531,
"E": -9.678627014160156
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 1.1493244171142578,
"scores": {
"A": -9.748445510864258,
"B": -8.529302597045898,
"C": -9.693565368652344,
"D": -11.449226379394531,
"E": -9.678627014160156
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "E",
"correct": false,
"margin": -0.2049875259399414,
"scores": {
"A": -5.457864761352539,
"B": -5.467221260070801,
"C": -8.277172088623047,
"D": -7.82890510559082,
"E": -5.262233734130859
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 0.9715185165405273,
"scores": {
"A": -8.980949401855469,
"B": -7.786107063293457,
"C": -9.11854076385498,
"D": -10.403557777404785,
"E": -8.757625579833984
}
},
"control_shared_randvec": {
"pred_label": "E",
"correct": false,
"margin": -1.1303739547729492,
"scores": {
"A": -7.216065406799316,
"B": -7.767756462097168,
"C": -9.236971855163574,
"D": -8.464241981506348,
"E": -6.637382507324219
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -0.7451763153076172,
"scores": {
"A": -7.8685197830200195,
"B": -8.613696098327637,
"C": -10.544960021972656,
"D": -9.806873321533203,
"E": -8.439760208129883
}
}
},
{
"ex_id": "aqua-test-105",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 3.0392799377441406,
"scores": {
"A": -11.515534400939941,
"B": -12.032148361206055,
"C": -8.4762544631958,
"D": -13.967401504516602,
"E": -13.267354011535645
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.133573055267334,
"scores": {
"A": -7.463276386260986,
"B": -9.80911636352539,
"C": -8.59684944152832,
"D": -13.382390975952148,
"E": -13.58960247039795
}
},
"patched_0": {
"pred_label": "C",
"correct": true,
"margin": 2.2149181365966797,
"scores": {
"A": -10.347264289855957,
"B": -10.250322341918945,
"C": -8.035404205322266,
"D": -12.775790214538574,
"E": -12.733001708984375
}
},
"patched_01": {
"pred_label": "C",
"correct": true,
"margin": 3.039278030395508,
"scores": {
"A": -11.515533447265625,
"B": -12.032148361206055,
"C": -8.476255416870117,
"D": -13.967406272888184,
"E": -13.267354965209961
}
},
"patched_full": {
"pred_label": "C",
"correct": true,
"margin": 3.039278030395508,
"scores": {
"A": -11.515533447265625,
"B": -12.032148361206055,
"C": -8.476255416870117,
"D": -13.967406272888184,
"E": -13.267354965209961
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -1.6724023818969727,
"scores": {
"A": -5.971221446990967,
"B": -7.549536228179932,
"C": -7.6436238288879395,
"D": -11.26807689666748,
"E": -11.341324806213379
}
},
"control_time_shuffled": {
"pred_label": "C",
"correct": true,
"margin": 2.1505050659179688,
"scores": {
"A": -10.598121643066406,
"B": -10.289155960083008,
"C": -8.138650894165039,
"D": -12.586099624633789,
"E": -12.69243049621582
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -1.6481122970581055,
"scores": {
"A": -6.949349403381348,
"B": -8.987531661987305,
"C": -8.597461700439453,
"D": -10.919259071350098,
"E": -11.619161605834961
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -1.1335716247558594,
"scores": {
"A": -7.4632720947265625,
"B": -9.809111595153809,
"C": -8.596843719482422,
"D": -13.382383346557617,
"E": -13.589597702026367
}
}
},
{
"ex_id": "aqua-test-111",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.18841552734375,
"scores": {
"A": -9.808207511901855,
"B": -9.283623695373535,
"C": -9.472039222717285,
"D": -10.7572660446167,
"E": -11.43770980834961
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.6287879943847656,
"scores": {
"A": -8.439443588256836,
"B": -12.068231582641602,
"C": -12.49129867553711,
"D": -13.331933975219727,
"E": -14.553701400756836
}
},
"patched_0": {
"pred_label": "C",
"correct": false,
"margin": -0.03671741485595703,
"scores": {
"A": -9.123326301574707,
"B": -8.150065422058105,
"C": -8.113348007202148,
"D": -9.43490219116211,
"E": -9.247275352478027
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 0.1884136199951172,
"scores": {
"A": -9.808208465576172,
"B": -9.283626556396484,
"C": -9.472040176391602,
"D": -10.757265090942383,
"E": -11.43770980834961
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 0.1884136199951172,
"scores": {
"A": -9.808208465576172,
"B": -9.283626556396484,
"C": -9.472040176391602,
"D": -10.757265090942383,
"E": -11.43770980834961
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -2.1629467010498047,
"scores": {
"A": -8.84734058380127,
"B": -11.010287284851074,
"C": -11.342278480529785,
"D": -11.88167953491211,
"E": -12.936474800109863
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 0.01021575927734375,
"scores": {
"A": -8.867430686950684,
"B": -7.981822967529297,
"C": -7.992038726806641,
"D": -9.280956268310547,
"E": -9.115897178649902
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -2.627413272857666,
"scores": {
"A": -7.264729022979736,
"B": -9.892142295837402,
"C": -9.825416564941406,
"D": -10.610671997070312,
"E": -11.448965072631836
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -3.628787040710449,
"scores": {
"A": -8.43944263458252,
"B": -12.068229675292969,
"C": -12.491294860839844,
"D": -13.331932067871094,
"E": -14.55370044708252
}
}
},
{
"ex_id": "aqua-test-116",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.7076244354248047,
"scores": {
"A": -12.037410736083984,
"B": -9.75387191772461,
"C": -11.461496353149414,
"D": -11.536352157592773,
"E": -11.817276000976562
}
},
"ablated": {
"pred_label": "D",
"correct": false,
"margin": -0.5289134979248047,
"scores": {
"A": -7.282122611999512,
"B": -7.493680000305176,
"C": -8.805983543395996,
"D": -6.964766502380371,
"E": -7.28157901763916
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 1.3713750839233398,
"scores": {
"A": -8.353381156921387,
"B": -6.982006072998047,
"C": -8.779082298278809,
"D": -8.85804271697998,
"E": -9.427443504333496
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 1.707632064819336,
"scores": {
"A": -12.037415504455566,
"B": -9.753875732421875,
"C": -11.461507797241211,
"D": -11.536357879638672,
"E": -11.817279815673828
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 1.707632064819336,
"scores": {
"A": -12.037415504455566,
"B": -9.753875732421875,
"C": -11.461507797241211,
"D": -11.536357879638672,
"E": -11.817279815673828
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "E",
"correct": false,
"margin": -0.8397531509399414,
"scores": {
"A": -7.520603656768799,
"B": -6.9838643074035645,
"C": -9.986032485961914,
"D": -7.869058132171631,
"E": -6.144111156463623
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 1.511284351348877,
"scores": {
"A": -8.105411529541016,
"B": -6.594127178192139,
"C": -8.44879150390625,
"D": -8.556886672973633,
"E": -9.028934478759766
}
},
"control_shared_randvec": {
"pred_label": "B",
"correct": true,
"margin": 0.8564348220825195,
"scores": {
"A": -7.948397636413574,
"B": -7.091962814331055,
"C": -10.485963821411133,
"D": -10.730182647705078,
"E": -8.363138198852539
}
},
"control_patch_nonshared": {
"pred_label": "D",
"correct": false,
"margin": -0.5289154052734375,
"scores": {
"A": -7.282122611999512,
"B": -7.493679046630859,
"C": -8.80598258972168,
"D": -6.964763641357422,
"E": -7.281576156616211
}
}
},
{
"ex_id": "aqua-test-120",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.3595123291015625,
"scores": {
"A": -12.646347045898438,
"B": -10.183612823486328,
"C": -10.54312515258789,
"D": -11.979488372802734,
"E": -12.640970230102539
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.8485918045043945,
"scores": {
"A": -7.463525772094727,
"B": -12.312117576599121,
"C": -11.753535270690918,
"D": -12.008286476135254,
"E": -13.767097473144531
}
},
"patched_0": {
"pred_label": "C",
"correct": false,
"margin": -0.4900999069213867,
"scores": {
"A": -12.293493270874023,
"B": -9.351531982421875,
"C": -8.861432075500488,
"D": -10.692264556884766,
"E": -10.770162582397461
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 0.3595142364501953,
"scores": {
"A": -12.646347045898438,
"B": -10.183609008789062,
"C": -10.543123245239258,
"D": -11.979488372802734,
"E": -12.640968322753906
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 0.3595142364501953,
"scores": {
"A": -12.646347045898438,
"B": -10.183609008789062,
"C": -10.543123245239258,
"D": -11.979488372802734,
"E": -12.640968322753906
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -4.472604751586914,
"scores": {
"A": -9.625394821166992,
"B": -14.097999572753906,
"C": -14.751794815063477,
"D": -14.831222534179688,
"E": -15.224090576171875
}
},
"control_time_shuffled": {
"pred_label": "C",
"correct": false,
"margin": -0.44132232666015625,
"scores": {
"A": -12.334344863891602,
"B": -9.446332931518555,
"C": -9.005010604858398,
"D": -10.665205001831055,
"E": -10.99666976928711
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -3.0735368728637695,
"scores": {
"A": -5.083780288696289,
"B": -8.157317161560059,
"C": -8.276480674743652,
"D": -9.385171890258789,
"E": -10.254486083984375
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -4.848588466644287,
"scores": {
"A": -7.463529109954834,
"B": -12.312117576599121,
"C": -11.753534317016602,
"D": -12.008285522460938,
"E": -13.767098426818848
}
}
},
{
"ex_id": "aqua-test-122",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.6598358154296875,
"scores": {
"A": -11.076019287109375,
"B": -10.416183471679688,
"C": -13.238750457763672,
"D": -13.289159774780273,
"E": -13.489381790161133
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.3248538970947266,
"scores": {
"A": -6.911991119384766,
"B": -9.236845016479492,
"C": -12.405698776245117,
"D": -10.99496078491211,
"E": -12.164006233215332
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 0.3234901428222656,
"scores": {
"A": -10.238473892211914,
"B": -9.914983749389648,
"C": -12.03645133972168,
"D": -12.105175018310547,
"E": -13.415177345275879
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 0.6598358154296875,
"scores": {
"A": -11.076021194458008,
"B": -10.41618537902832,
"C": -13.23875617980957,
"D": -13.289161682128906,
"E": -13.489385604858398
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 0.6598358154296875,
"scores": {
"A": -11.076021194458008,
"B": -10.41618537902832,
"C": -13.23875617980957,
"D": -13.289161682128906,
"E": -13.489385604858398
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -2.418147087097168,
"scores": {
"A": -5.337198257446289,
"B": -7.755345344543457,
"C": -12.551668167114258,
"D": -11.829740524291992,
"E": -12.763933181762695
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 0.2971000671386719,
"scores": {
"A": -10.380821228027344,
"B": -10.083721160888672,
"C": -12.148942947387695,
"D": -12.248394966125488,
"E": -13.505158424377441
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -3.0811538696289062,
"scores": {
"A": -4.878448963165283,
"B": -7.9596028327941895,
"C": -11.607043266296387,
"D": -9.50536823272705,
"E": -11.090916633605957
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -2.3248538970947266,
"scores": {
"A": -6.911995887756348,
"B": -9.236849784851074,
"C": -12.405706405639648,
"D": -10.994964599609375,
"E": -12.164009094238281
}
}
},
{
"ex_id": "aqua-test-123",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 1.928288459777832,
"scores": {
"A": -12.817946434020996,
"B": -13.251622200012207,
"C": -10.08199405670166,
"D": -12.010282516479492,
"E": -12.828923225402832
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.036839485168457,
"scores": {
"A": -7.727773666381836,
"B": -10.925074577331543,
"C": -11.764613151550293,
"D": -11.528144836425781,
"E": -13.928091049194336
}
},
"patched_0": {
"pred_label": "C",
"correct": true,
"margin": 0.1131134033203125,
"scores": {
"A": -11.61458969116211,
"B": -11.275108337402344,
"C": -11.161994934082031,
"D": -11.986404418945312,
"E": -12.851778030395508
}
},
"patched_01": {
"pred_label": "C",
"correct": true,
"margin": 1.9282875061035156,
"scores": {
"A": -12.817957878112793,
"B": -13.251638412475586,
"C": -10.082003593444824,
"D": -12.01029109954834,
"E": -12.82893180847168
}
},
"patched_full": {
"pred_label": "C",
"correct": true,
"margin": 1.9282875061035156,
"scores": {
"A": -12.817957878112793,
"B": -13.251638412475586,
"C": -10.082003593444824,
"D": -12.01029109954834,
"E": -12.82893180847168
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -4.267435073852539,
"scores": {
"A": -8.1873779296875,
"B": -10.754793167114258,
"C": -12.454813003540039,
"D": -13.088325500488281,
"E": -14.197637557983398
}
},
"control_time_shuffled": {
"pred_label": "C",
"correct": true,
"margin": 0.0443572998046875,
"scores": {
"A": -11.579950332641602,
"B": -11.245656967163086,
"C": -11.201299667358398,
"D": -12.026390075683594,
"E": -12.818794250488281
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -5.751577854156494,
"scores": {
"A": -4.197388172149658,
"B": -8.354893684387207,
"C": -9.948966026306152,
"D": -9.155034065246582,
"E": -10.130256652832031
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -4.036839485168457,
"scores": {
"A": -7.7277727127075195,
"B": -10.925077438354492,
"C": -11.764612197875977,
"D": -11.528146743774414,
"E": -13.928092002868652
}
}
},
{
"ex_id": "aqua-test-125",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.26287078857421875,
"scores": {
"A": -12.876455307006836,
"B": -12.006429672241211,
"C": -10.34354305267334,
"D": -10.606413841247559,
"E": -11.505398750305176
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.1885986328125,
"scores": {
"A": -6.909121513366699,
"B": -8.705928802490234,
"C": -10.0977201461792,
"D": -9.862305641174316,
"E": -10.177146911621094
}
},
"patched_0": {
"pred_label": "B",
"correct": false,
"margin": -0.24047374725341797,
"scores": {
"A": -9.58531379699707,
"B": -8.929466247558594,
"C": -9.169939994812012,
"D": -9.5785493850708,
"E": -10.480676651000977
}
},
"patched_01": {
"pred_label": "C",
"correct": true,
"margin": 0.26287078857421875,
"scores": {
"A": -12.876452445983887,
"B": -12.006429672241211,
"C": -10.34354305267334,
"D": -10.606413841247559,
"E": -11.50539779663086
}
},
"patched_full": {
"pred_label": "C",
"correct": true,
"margin": 0.26287078857421875,
"scores": {
"A": -12.876452445983887,
"B": -12.006429672241211,
"C": -10.34354305267334,
"D": -10.606413841247559,
"E": -11.50539779663086
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -3.153430461883545,
"scores": {
"A": -7.0063958168029785,
"B": -9.034759521484375,
"C": -10.159826278686523,
"D": -11.367905616760254,
"E": -11.336196899414062
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": false,
"margin": -0.06798362731933594,
"scores": {
"A": -9.642061233520508,
"B": -8.649129867553711,
"C": -8.717113494873047,
"D": -9.228824615478516,
"E": -10.170103073120117
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -2.090330123901367,
"scores": {
"A": -7.855500221252441,
"B": -8.707620620727539,
"C": -9.945830345153809,
"D": -13.191256523132324,
"E": -12.100184440612793
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -3.1885976791381836,
"scores": {
"A": -6.909127235412598,
"B": -8.7059326171875,
"C": -10.097724914550781,
"D": -9.862310409545898,
"E": -10.177148818969727
}
}
},
{
"ex_id": "aqua-test-130",
"gold": "D",
"baseline": {
"pred_label": "D",
"correct": true,
"margin": 0.5736770629882812,
"scores": {
"A": -12.52768611907959,
"B": -11.624752044677734,
"C": -14.400633811950684,
"D": -11.051074981689453,
"E": -12.196588516235352
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.6399145126342773,
"scores": {
"A": -7.714714050292969,
"B": -7.738489151000977,
"C": -10.441914558410645,
"D": -8.354628562927246,
"E": -8.231303215026855
}
},
"patched_0": {
"pred_label": "B",
"correct": false,
"margin": -0.8715057373046875,
"scores": {
"A": -10.206192016601562,
"B": -8.503612518310547,
"C": -9.418848037719727,
"D": -9.375118255615234,
"E": -9.415210723876953
}
},
"patched_01": {
"pred_label": "D",
"correct": true,
"margin": 0.5736827850341797,
"scores": {
"A": -12.527690887451172,
"B": -11.624759674072266,
"C": -14.400640487670898,
"D": -11.051076889038086,
"E": -12.196598052978516
}
},
"patched_full": {
"pred_label": "D",
"correct": true,
"margin": 0.5736827850341797,
"scores": {
"A": -12.527690887451172,
"B": -11.624759674072266,
"C": -14.400640487670898,
"D": -11.051076889038086,
"E": -12.196598052978516
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "B",
"correct": false,
"margin": -1.5013294219970703,
"scores": {
"A": -5.733486652374268,
"B": -5.340871334075928,
"C": -8.164079666137695,
"D": -6.842200756072998,
"E": -6.404123783111572
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": false,
"margin": -0.8385581970214844,
"scores": {
"A": -10.177495002746582,
"B": -8.628342628479004,
"C": -9.545053482055664,
"D": -9.466900825500488,
"E": -9.466954231262207
}
},
"control_shared_randvec": {
"pred_label": "B",
"correct": false,
"margin": -1.8849029541015625,
"scores": {
"A": -7.422325134277344,
"B": -7.01991081237793,
"C": -9.694180488586426,
"D": -8.904813766479492,
"E": -8.477489471435547
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -0.6399135589599609,
"scores": {
"A": -7.714714050292969,
"B": -7.738491058349609,
"C": -10.441915512084961,
"D": -8.35462760925293,
"E": -8.231302261352539
}
}
},
{
"ex_id": "aqua-test-140",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.090902328491211,
"scores": {
"A": -12.17054557800293,
"B": -10.950679779052734,
"C": -12.478940963745117,
"D": -12.041582107543945,
"E": -12.825494766235352
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.0806522369384766,
"scores": {
"A": -8.631109237670898,
"B": -9.711761474609375,
"C": -10.810302734375,
"D": -10.214776992797852,
"E": -11.603350639343262
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 1.1358776092529297,
"scores": {
"A": -9.581466674804688,
"B": -8.445589065551758,
"C": -9.826594352722168,
"D": -10.325157165527344,
"E": -11.036417961120605
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 1.0909004211425781,
"scores": {
"A": -12.17054557800293,
"B": -10.950679779052734,
"C": -12.478940963745117,
"D": -12.041580200195312,
"E": -12.825498580932617
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 1.0909004211425781,
"scores": {
"A": -12.17054557800293,
"B": -10.950679779052734,
"C": -12.478940963745117,
"D": -12.041580200195312,
"E": -12.825498580932617
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -1.6461381912231445,
"scores": {
"A": -8.256159782409668,
"B": -9.902297973632812,
"C": -11.741201400756836,
"D": -11.071731567382812,
"E": -12.105072975158691
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 1.1649103164672852,
"scores": {
"A": -9.316244125366211,
"B": -8.151333808898926,
"C": -9.497949600219727,
"D": -10.047868728637695,
"E": -10.842238426208496
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -1.3181371688842773,
"scores": {
"A": -7.826546669006348,
"B": -9.144683837890625,
"C": -11.221202850341797,
"D": -10.469319343566895,
"E": -11.426633834838867
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -1.0806512832641602,
"scores": {
"A": -8.631110191345215,
"B": -9.711761474609375,
"C": -10.810298919677734,
"D": -10.214774131774902,
"E": -11.603349685668945
}
}
},
{
"ex_id": "aqua-test-141",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.3972196578979492,
"scores": {
"A": -15.668845176696777,
"B": -14.022212028503418,
"C": -12.345376968383789,
"D": -12.742596626281738,
"E": -13.434144973754883
}
},
"ablated": {
"pred_label": "E",
"correct": false,
"margin": -1.4827747344970703,
"scores": {
"A": -9.032247543334961,
"B": -10.177014350891113,
"C": -9.580657005310059,
"D": -8.116410255432129,
"E": -8.097882270812988
}
},
"patched_0": {
"pred_label": "C",
"correct": true,
"margin": 0.9053430557250977,
"scores": {
"A": -12.772378921508789,
"B": -10.580516815185547,
"C": -8.90491771697998,
"D": -9.810260772705078,
"E": -9.986860275268555
}
},
"patched_01": {
"pred_label": "C",
"correct": true,
"margin": 0.3972187042236328,
"scores": {
"A": -15.668844223022461,
"B": -14.022214889526367,
"C": -12.345376968383789,
"D": -12.742595672607422,
"E": -13.43414306640625
}
},
"patched_full": {
"pred_label": "C",
"correct": true,
"margin": 0.3972187042236328,
"scores": {
"A": -15.668844223022461,
"B": -14.022214889526367,
"C": -12.345376968383789,
"D": -12.742595672607422,
"E": -13.43414306640625
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "D",
"correct": false,
"margin": -0.9576740264892578,
"scores": {
"A": -8.790735244750977,
"B": -9.536684036254883,
"C": -9.24622917175293,
"D": -8.288555145263672,
"E": -8.68001937866211
}
},
"control_time_shuffled": {
"pred_label": "C",
"correct": true,
"margin": 0.9014320373535156,
"scores": {
"A": -12.741355895996094,
"B": -10.564189910888672,
"C": -8.901117324829102,
"D": -9.802549362182617,
"E": -9.944976806640625
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -2.398233413696289,
"scores": {
"A": -8.182514190673828,
"B": -11.277816772460938,
"C": -10.580747604370117,
"D": -9.102690696716309,
"E": -9.042513847351074
}
},
"control_patch_nonshared": {
"pred_label": "E",
"correct": false,
"margin": -1.4827728271484375,
"scores": {
"A": -9.032241821289062,
"B": -10.177009582519531,
"C": -9.580652236938477,
"D": -8.116405487060547,
"E": -8.097879409790039
}
}
},
{
"ex_id": "aqua-test-148",
"gold": "D",
"baseline": {
"pred_label": "D",
"correct": true,
"margin": 0.09283638000488281,
"scores": {
"A": -11.842838287353516,
"B": -8.686580657958984,
"C": -9.391075134277344,
"D": -8.593744277954102,
"E": -10.327585220336914
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.2917442321777344,
"scores": {
"A": -8.228094100952148,
"B": -9.228675842285156,
"C": -9.42142105102539,
"D": -8.519838333129883,
"E": -9.596782684326172
}
},
"patched_0": {
"pred_label": "B",
"correct": false,
"margin": -1.0341577529907227,
"scores": {
"A": -9.260769844055176,
"B": -6.733394622802734,
"C": -7.005693435668945,
"D": -7.767552375793457,
"E": -8.883651733398438
}
},
"patched_01": {
"pred_label": "D",
"correct": true,
"margin": 0.09284019470214844,
"scores": {
"A": -11.842844009399414,
"B": -8.686589241027832,
"C": -9.391081809997559,
"D": -8.593749046325684,
"E": -10.327591896057129
}
},
"patched_full": {
"pred_label": "D",
"correct": true,
"margin": 0.09284019470214844,
"scores": {
"A": -11.842844009399414,
"B": -8.686589241027832,
"C": -9.391081809997559,
"D": -8.593749046325684,
"E": -10.327591896057129
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -0.08560562133789062,
"scores": {
"A": -6.6732988357543945,
"B": -7.1317548751831055,
"C": -7.866713523864746,
"D": -6.758904457092285,
"E": -8.30843448638916
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": false,
"margin": -0.9941120147705078,
"scores": {
"A": -9.38866901397705,
"B": -6.864222526550293,
"C": -7.074477195739746,
"D": -7.858334541320801,
"E": -8.94621753692627
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -0.6664242744445801,
"scores": {
"A": -7.582589626312256,
"B": -8.015180587768555,
"C": -9.47393798828125,
"D": -8.249013900756836,
"E": -8.513860702514648
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -0.2917442321777344,
"scores": {
"A": -8.228096008300781,
"B": -9.228679656982422,
"C": -9.42142105102539,
"D": -8.519840240478516,
"E": -9.596784591674805
}
}
},
{
"ex_id": "aqua-test-152",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.6472129821777344,
"scores": {
"A": -12.141305923461914,
"B": -11.08128833770752,
"C": -11.728501319885254,
"D": -11.744885444641113,
"E": -11.734070777893066
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.606741905212402,
"scores": {
"A": -8.290619850158691,
"B": -12.897361755371094,
"C": -16.176721572875977,
"D": -13.130666732788086,
"E": -13.918773651123047
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 0.7464790344238281,
"scores": {
"A": -10.325726509094238,
"B": -9.529410362243652,
"C": -10.725006103515625,
"D": -10.360553741455078,
"E": -10.27588939666748
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 0.6472129821777344,
"scores": {
"A": -12.141304969787598,
"B": -11.081286430358887,
"C": -11.728499412536621,
"D": -11.744885444641113,
"E": -11.7340726852417
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 0.6472129821777344,
"scores": {
"A": -12.141304969787598,
"B": -11.081286430358887,
"C": -11.728499412536621,
"D": -11.744885444641113,
"E": -11.7340726852417
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -3.180154800415039,
"scores": {
"A": -8.851917266845703,
"B": -12.032072067260742,
"C": -15.113250732421875,
"D": -13.466560363769531,
"E": -12.649953842163086
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 0.4894752502441406,
"scores": {
"A": -10.067070007324219,
"B": -9.577594757080078,
"C": -10.798778533935547,
"D": -10.269950866699219,
"E": -10.165655136108398
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -3.321539878845215,
"scores": {
"A": -8.855324745178223,
"B": -12.176864624023438,
"C": -15.197938919067383,
"D": -12.816364288330078,
"E": -12.050538063049316
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -4.606740951538086,
"scores": {
"A": -8.29061508178711,
"B": -12.897356033325195,
"C": -16.176713943481445,
"D": -13.13066291809082,
"E": -13.918767929077148
}
}
},
{
"ex_id": "aqua-test-167",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 2.286722183227539,
"scores": {
"A": -13.319049835205078,
"B": -10.63465690612793,
"C": -12.921379089355469,
"D": -16.10821533203125,
"E": -14.74123764038086
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.3104257583618164,
"scores": {
"A": -8.05471420288086,
"B": -11.365139961242676,
"C": -15.134896278381348,
"D": -13.336740493774414,
"E": -14.394715309143066
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 1.7309093475341797,
"scores": {
"A": -11.87700366973877,
"B": -10.14609432220459,
"C": -11.895035743713379,
"D": -13.453927040100098,
"E": -13.269637107849121
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 2.286722183227539,
"scores": {
"A": -13.319046020507812,
"B": -10.634654998779297,
"C": -12.921377182006836,
"D": -16.10821533203125,
"E": -14.74123764038086
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 2.286722183227539,
"scores": {
"A": -13.319046020507812,
"B": -10.634654998779297,
"C": -12.921377182006836,
"D": -16.10821533203125,
"E": -14.74123764038086
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -2.7075023651123047,
"scores": {
"A": -8.526466369628906,
"B": -11.233968734741211,
"C": -16.785362243652344,
"D": -15.479930877685547,
"E": -15.917808532714844
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 1.8152027130126953,
"scores": {
"A": -12.013884544372559,
"B": -10.008508682250977,
"C": -11.823711395263672,
"D": -13.332377433776855,
"E": -13.145186424255371
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -3.2307591438293457,
"scores": {
"A": -7.303309917449951,
"B": -10.534069061279297,
"C": -14.878864288330078,
"D": -14.44310188293457,
"E": -14.893917083740234
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -3.3104257583618164,
"scores": {
"A": -8.054710388183594,
"B": -11.36513614654541,
"C": -15.13489055633545,
"D": -13.336731910705566,
"E": -14.394709587097168
}
}
},
{
"ex_id": "aqua-test-178",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.44650745391845703,
"scores": {
"A": -13.206219673156738,
"B": -11.094629287719727,
"C": -12.79085922241211,
"D": -12.61279582977295,
"E": -10.64812183380127
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.734641075134277,
"scores": {
"A": -5.792222023010254,
"B": -10.40644359588623,
"C": -8.512224197387695,
"D": -10.881692886352539,
"E": -12.526863098144531
}
},
"patched_0": {
"pred_label": "B",
"correct": false,
"margin": -2.943204402923584,
"scores": {
"A": -9.805769920349121,
"B": -5.3867316246032715,
"C": -6.29595947265625,
"D": -9.170482635498047,
"E": -8.329936027526855
}
},
"patched_01": {
"pred_label": "E",
"correct": true,
"margin": 0.4465036392211914,
"scores": {
"A": -13.206205368041992,
"B": -11.094612121582031,
"C": -12.790840148925781,
"D": -12.612784385681152,
"E": -10.64810848236084
}
},
"patched_full": {
"pred_label": "E",
"correct": true,
"margin": 0.4465036392211914,
"scores": {
"A": -13.206205368041992,
"B": -11.094612121582031,
"C": -12.790840148925781,
"D": -12.612784385681152,
"E": -10.64810848236084
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -4.96022367477417,
"scores": {
"A": -5.559600353240967,
"B": -9.4379243850708,
"C": -8.867037773132324,
"D": -11.44845199584961,
"E": -10.519824028015137
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": false,
"margin": -3.127613067626953,
"scores": {
"A": -9.935117721557617,
"B": -5.414183616638184,
"C": -6.386631965637207,
"D": -9.185277938842773,
"E": -8.541796684265137
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -6.373246669769287,
"scores": {
"A": -6.259435176849365,
"B": -10.628332138061523,
"C": -10.076188087463379,
"D": -13.082308769226074,
"E": -12.632681846618652
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -6.734642028808594,
"scores": {
"A": -5.792219161987305,
"B": -10.406440734863281,
"C": -8.512224197387695,
"D": -10.881689071655273,
"E": -12.526861190795898
}
}
},
{
"ex_id": "aqua-test-181",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.7682161331176758,
"scores": {
"A": -9.629287719726562,
"B": -8.861071586608887,
"C": -11.832342147827148,
"D": -11.63463020324707,
"E": -10.680866241455078
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.5154037475585938,
"scores": {
"A": -7.565939903259277,
"B": -9.081343650817871,
"C": -10.455299377441406,
"D": -9.157304763793945,
"E": -9.032361030578613
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 1.180495262145996,
"scores": {
"A": -9.739990234375,
"B": -8.418399810791016,
"C": -9.598895072937012,
"D": -10.474803924560547,
"E": -9.851222038269043
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 0.7682132720947266,
"scores": {
"A": -9.629287719726562,
"B": -8.861074447631836,
"C": -11.832342147827148,
"D": -11.634628295898438,
"E": -10.680864334106445
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 0.7682132720947266,
"scores": {
"A": -9.629287719726562,
"B": -8.861074447631836,
"C": -11.832342147827148,
"D": -11.634628295898438,
"E": -10.680864334106445
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -0.6810026168823242,
"scores": {
"A": -4.672728061676025,
"B": -5.35373067855835,
"C": -7.516335964202881,
"D": -6.342844486236572,
"E": -4.79114294052124
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 1.1485824584960938,
"scores": {
"A": -9.888175964355469,
"B": -8.739593505859375,
"C": -9.970766067504883,
"D": -10.718721389770508,
"E": -10.002328872680664
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -0.9188671112060547,
"scores": {
"A": -9.10055160522461,
"B": -10.019418716430664,
"C": -11.787355422973633,
"D": -10.639719009399414,
"E": -9.353466033935547
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -1.515404224395752,
"scores": {
"A": -7.565932750701904,
"B": -9.081336975097656,
"C": -10.455291748046875,
"D": -9.157295227050781,
"E": -9.032354354858398
}
}
},
{
"ex_id": "aqua-test-183",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.4096593856811523,
"scores": {
"A": -12.662055969238281,
"B": -9.372528076171875,
"C": -10.782187461853027,
"D": -13.160992622375488,
"E": -13.141705513000488
}
},
"ablated": {
"pred_label": "C",
"correct": false,
"margin": -0.7217111587524414,
"scores": {
"A": -8.667959213256836,
"B": -7.500253200531006,
"C": -6.7785420417785645,
"D": -9.29892349243164,
"E": -10.76202392578125
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 0.6202316284179688,
"scores": {
"A": -10.945490837097168,
"B": -7.309451103210449,
"C": -7.929682731628418,
"D": -10.611489295959473,
"E": -11.70968246459961
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 1.409658432006836,
"scores": {
"A": -12.662059783935547,
"B": -9.372528076171875,
"C": -10.782186508178711,
"D": -13.160991668701172,
"E": -13.141706466674805
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 1.409658432006836,
"scores": {
"A": -12.662059783935547,
"B": -9.372528076171875,
"C": -10.782186508178711,
"D": -13.160991668701172,
"E": -13.141706466674805
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "C",
"correct": false,
"margin": -0.8798198699951172,
"scores": {
"A": -10.624456405639648,
"B": -9.079212188720703,
"C": -8.199392318725586,
"D": -13.810981750488281,
"E": -14.749567031860352
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 0.6436195373535156,
"scores": {
"A": -10.855729103088379,
"B": -7.0558881759643555,
"C": -7.699507713317871,
"D": -10.375727653503418,
"E": -11.54419231414795
}
},
"control_shared_randvec": {
"pred_label": "C",
"correct": false,
"margin": -0.7752676010131836,
"scores": {
"A": -7.476681232452393,
"B": -7.795263767242432,
"C": -7.019996166229248,
"D": -9.245484352111816,
"E": -9.37934684753418
}
},
"control_patch_nonshared": {
"pred_label": "C",
"correct": false,
"margin": -0.7217121124267578,
"scores": {
"A": -8.667959213256836,
"B": -7.500255584716797,
"C": -6.778543472290039,
"D": -9.298927307128906,
"E": -10.762025833129883
}
}
},
{
"ex_id": "aqua-test-189",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.20896244049072266,
"scores": {
"A": -14.54034423828125,
"B": -11.554760932922363,
"C": -11.811978340148926,
"D": -11.763723373413086,
"E": -13.348597526550293
}
},
"ablated": {
"pred_label": "D",
"correct": false,
"margin": -0.8642768859863281,
"scores": {
"A": -9.882810592651367,
"B": -10.419057846069336,
"C": -10.307378768920898,
"D": -9.554780960083008,
"E": -9.593378067016602
}
},
"patched_0": {
"pred_label": "C",
"correct": false,
"margin": -0.7312335968017578,
"scores": {
"A": -11.619770050048828,
"B": -9.795265197753906,
"C": -9.064031600952148,
"D": -9.905113220214844,
"E": -9.719362258911133
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 0.20896339416503906,
"scores": {
"A": -14.54034423828125,
"B": -11.554759979248047,
"C": -11.811981201171875,
"D": -11.763723373413086,
"E": -13.348596572875977
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 0.20896339416503906,
"scores": {
"A": -14.54034423828125,
"B": -11.554759979248047,
"C": -11.811981201171875,
"D": -11.763723373413086,
"E": -13.348596572875977
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "D",
"correct": false,
"margin": -0.6974210739135742,
"scores": {
"A": -9.276986122131348,
"B": -9.103320121765137,
"C": -10.14097785949707,
"D": -8.405899047851562,
"E": -8.49357795715332
}
},
"control_time_shuffled": {
"pred_label": "C",
"correct": false,
"margin": -0.7965354919433594,
"scores": {
"A": -11.578865051269531,
"B": -9.813770294189453,
"C": -9.017234802246094,
"D": -9.923456192016602,
"E": -9.655179977416992
}
},
"control_shared_randvec": {
"pred_label": "E",
"correct": false,
"margin": -0.7022542953491211,
"scores": {
"A": -6.854315757751465,
"B": -6.292705535888672,
"C": -6.938782215118408,
"D": -5.983695030212402,
"E": -5.590451240539551
}
},
"control_patch_nonshared": {
"pred_label": "D",
"correct": false,
"margin": -0.8642749786376953,
"scores": {
"A": -9.88280963897705,
"B": -10.419052124023438,
"C": -10.30737590789795,
"D": -9.554777145385742,
"E": -9.59337329864502
}
}
},
{
"ex_id": "aqua-test-190",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.35360145568847656,
"scores": {
"A": -13.596860885620117,
"B": -10.771349906921387,
"C": -10.41774845123291,
"D": -13.349145889282227,
"E": -13.912391662597656
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.489037036895752,
"scores": {
"A": -6.3273138999938965,
"B": -9.247300148010254,
"C": -12.816350936889648,
"D": -10.787364959716797,
"E": -12.917289733886719
}
},
"patched_0": {
"pred_label": "C",
"correct": true,
"margin": 0.29494571685791016,
"scores": {
"A": -10.62340259552002,
"B": -9.846135139465332,
"C": -9.551189422607422,
"D": -10.956984519958496,
"E": -11.488529205322266
}
},
"patched_01": {
"pred_label": "C",
"correct": true,
"margin": 0.35360145568847656,
"scores": {
"A": -13.59686279296875,
"B": -10.771347999572754,
"C": -10.417746543884277,
"D": -13.34914493560791,
"E": -13.912391662597656
}
},
"patched_full": {
"pred_label": "C",
"correct": true,
"margin": 0.35360145568847656,
"scores": {
"A": -13.59686279296875,
"B": -10.771347999572754,
"C": -10.417746543884277,
"D": -13.34914493560791,
"E": -13.912391662597656
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -6.209604263305664,
"scores": {
"A": -7.986570358276367,
"B": -10.776678085327148,
"C": -14.196174621582031,
"D": -12.462160110473633,
"E": -14.763839721679688
}
},
"control_time_shuffled": {
"pred_label": "C",
"correct": true,
"margin": 0.43008899688720703,
"scores": {
"A": -10.530719757080078,
"B": -10.036431312561035,
"C": -9.606342315673828,
"D": -10.94388198852539,
"E": -11.484930038452148
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -4.7598161697387695,
"scores": {
"A": -8.101582527160645,
"B": -8.396411895751953,
"C": -12.861398696899414,
"D": -12.736745834350586,
"E": -13.218839645385742
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -6.489035606384277,
"scores": {
"A": -6.327314376831055,
"B": -9.247294425964355,
"C": -12.816349983215332,
"D": -10.787363052368164,
"E": -12.917287826538086
}
}
},
{
"ex_id": "aqua-test-191",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.9574899673461914,
"scores": {
"A": -11.895600318908691,
"B": -10.9381103515625,
"C": -13.633337020874023,
"D": -14.099964141845703,
"E": -13.749225616455078
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.093125343322754,
"scores": {
"A": -6.434209823608398,
"B": -8.527335166931152,
"C": -11.775838851928711,
"D": -11.290367126464844,
"E": -12.324054718017578
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 1.0822925567626953,
"scores": {
"A": -8.277904510498047,
"B": -7.195611953735352,
"C": -8.55109977722168,
"D": -9.725017547607422,
"E": -9.788521766662598
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 0.9574871063232422,
"scores": {
"A": -11.895593643188477,
"B": -10.938106536865234,
"C": -13.63333511352539,
"D": -14.099959373474121,
"E": -13.749227523803711
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 0.9574871063232422,
"scores": {
"A": -11.895593643188477,
"B": -10.938106536865234,
"C": -13.63333511352539,
"D": -14.099959373474121,
"E": -13.749227523803711
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -1.307638168334961,
"scores": {
"A": -6.399979591369629,
"B": -7.70761775970459,
"C": -11.518457412719727,
"D": -11.318992614746094,
"E": -11.238313674926758
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 1.0426197052001953,
"scores": {
"A": -8.261636734008789,
"B": -7.219017028808594,
"C": -8.581559181213379,
"D": -9.666144371032715,
"E": -9.812707901000977
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -1.6853055953979492,
"scores": {
"A": -7.959956169128418,
"B": -9.645261764526367,
"C": -13.037406921386719,
"D": -12.513145446777344,
"E": -9.820344924926758
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -2.093125820159912,
"scores": {
"A": -6.434208393096924,
"B": -8.527334213256836,
"C": -11.775838851928711,
"D": -11.290367126464844,
"E": -12.324055671691895
}
}
},
{
"ex_id": "aqua-test-206",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.18168067932128906,
"scores": {
"A": -11.602930068969727,
"B": -10.961795806884766,
"C": -11.143476486206055,
"D": -12.837438583374023,
"E": -14.00632095336914
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.0302047729492188,
"scores": {
"A": -8.013933181762695,
"B": -11.044137954711914,
"C": -12.337331771850586,
"D": -11.77204704284668,
"E": -14.158761024475098
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 0.8052225112915039,
"scores": {
"A": -11.124311447143555,
"B": -10.31908893585205,
"C": -11.636249542236328,
"D": -12.520262718200684,
"E": -14.125991821289062
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 0.18168067932128906,
"scores": {
"A": -11.60293197631836,
"B": -10.961797714233398,
"C": -11.143478393554688,
"D": -12.837438583374023,
"E": -14.006319046020508
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 0.18168067932128906,
"scores": {
"A": -11.60293197631836,
"B": -10.961797714233398,
"C": -11.143478393554688,
"D": -12.837438583374023,
"E": -14.006319046020508
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -2.3860421180725098,
"scores": {
"A": -6.778487682342529,
"B": -9.164529800415039,
"C": -12.009851455688477,
"D": -12.049808502197266,
"E": -13.598691940307617
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 0.9240913391113281,
"scores": {
"A": -11.181600570678711,
"B": -10.257509231567383,
"C": -11.613018035888672,
"D": -12.530416488647461,
"E": -13.934183120727539
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -2.196244239807129,
"scores": {
"A": -7.369185447692871,
"B": -9.5654296875,
"C": -11.989535331726074,
"D": -11.94742488861084,
"E": -13.421416282653809
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -3.0302047729492188,
"scores": {
"A": -8.013933181762695,
"B": -11.044137954711914,
"C": -12.337331771850586,
"D": -11.772050857543945,
"E": -14.15876579284668
}
}
},
{
"ex_id": "aqua-test-212",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.515838623046875,
"scores": {
"A": -11.57960319519043,
"B": -9.604219436645508,
"C": -11.120058059692383,
"D": -11.739898681640625,
"E": -12.83167839050293
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.382899284362793,
"scores": {
"A": -4.981387138366699,
"B": -8.364286422729492,
"C": -11.265626907348633,
"D": -9.413225173950195,
"E": -11.893355369567871
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 1.4837512969970703,
"scores": {
"A": -9.99412727355957,
"B": -7.378774642944336,
"C": -8.862525939941406,
"D": -9.535578727722168,
"E": -9.807991981506348
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 1.5158329010009766,
"scores": {
"A": -11.579606056213379,
"B": -9.604227066040039,
"C": -11.120059967041016,
"D": -11.739900588989258,
"E": -12.831683158874512
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 1.5158329010009766,
"scores": {
"A": -11.579606056213379,
"B": -9.604227066040039,
"C": -11.120059967041016,
"D": -11.739900588989258,
"E": -12.831683158874512
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -3.0996170043945312,
"scores": {
"A": -5.803328514099121,
"B": -8.902945518493652,
"C": -12.642228126525879,
"D": -12.522542953491211,
"E": -14.157147407531738
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 1.5297069549560547,
"scores": {
"A": -10.015952110290527,
"B": -7.30755615234375,
"C": -8.837263107299805,
"D": -9.575658798217773,
"E": -9.859335899353027
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -2.132758140563965,
"scores": {
"A": -7.283851623535156,
"B": -9.416609764099121,
"C": -12.729516983032227,
"D": -10.042702674865723,
"E": -12.341903686523438
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -3.382904052734375,
"scores": {
"A": -4.981382369995117,
"B": -8.364286422729492,
"C": -11.26562213897705,
"D": -9.413222312927246,
"E": -11.893354415893555
}
}
},
{
"ex_id": "aqua-test-223",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.090752601623535,
"scores": {
"A": -10.285022735595703,
"B": -7.155424118041992,
"C": -10.246176719665527,
"D": -10.93359375,
"E": -11.335384368896484
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.092008590698242,
"scores": {
"A": -4.6826276779174805,
"B": -8.774636268615723,
"C": -12.371101379394531,
"D": -11.170863151550293,
"E": -13.101846694946289
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 2.5026049613952637,
"scores": {
"A": -10.422887802124023,
"B": -5.899902820587158,
"C": -8.402507781982422,
"D": -9.963022232055664,
"E": -10.305240631103516
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 3.0907530784606934,
"scores": {
"A": -10.285022735595703,
"B": -7.155422687530518,
"C": -10.246175765991211,
"D": -10.933595657348633,
"E": -11.335386276245117
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 3.0907530784606934,
"scores": {
"A": -10.285022735595703,
"B": -7.155422687530518,
"C": -10.246175765991211,
"D": -10.933595657348633,
"E": -11.335386276245117
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -2.352473258972168,
"scores": {
"A": -4.42826509475708,
"B": -6.780738353729248,
"C": -12.333539009094238,
"D": -11.84121036529541,
"E": -13.286885261535645
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 2.4003238677978516,
"scores": {
"A": -10.343778610229492,
"B": -5.7316436767578125,
"C": -8.131967544555664,
"D": -9.640758514404297,
"E": -10.208199501037598
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -3.1807565689086914,
"scores": {
"A": -5.085258483886719,
"B": -8.26601505279541,
"C": -12.408848762512207,
"D": -11.484879493713379,
"E": -12.500157356262207
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -4.092006683349609,
"scores": {
"A": -4.682626724243164,
"B": -8.774633407592773,
"C": -12.371101379394531,
"D": -11.17086410522461,
"E": -13.101846694946289
}
}
},
{
"ex_id": "aqua-test-228",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.06908798217773438,
"scores": {
"A": -13.294260025024414,
"B": -10.70706558227539,
"C": -10.776153564453125,
"D": -14.082728385925293,
"E": -14.882830619812012
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.2796850204467773,
"scores": {
"A": -7.156650543212891,
"B": -8.436335563659668,
"C": -9.495584487915039,
"D": -10.117116928100586,
"E": -8.917889595031738
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 0.7942695617675781,
"scores": {
"A": -8.922301292419434,
"B": -6.567187309265137,
"C": -7.361456871032715,
"D": -9.64250659942627,
"E": -9.15162181854248
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 0.06908607482910156,
"scores": {
"A": -13.294256210327148,
"B": -10.70706558227539,
"C": -10.776151657104492,
"D": -14.082728385925293,
"E": -14.882831573486328
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 0.06908607482910156,
"scores": {
"A": -13.294256210327148,
"B": -10.70706558227539,
"C": -10.776151657104492,
"D": -14.082728385925293,
"E": -14.882831573486328
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -1.0041084289550781,
"scores": {
"A": -5.43384313583374,
"B": -6.437951564788818,
"C": -6.356510639190674,
"D": -9.189103126525879,
"E": -8.109259605407715
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 0.8438491821289062,
"scores": {
"A": -8.9219388961792,
"B": -6.8384809494018555,
"C": -7.682330131530762,
"D": -9.85693073272705,
"E": -9.25981616973877
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -0.8855304718017578,
"scores": {
"A": -6.823153495788574,
"B": -7.708683967590332,
"C": -9.504840850830078,
"D": -8.854315757751465,
"E": -7.114500999450684
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -1.2796845436096191,
"scores": {
"A": -7.156657695770264,
"B": -8.436342239379883,
"C": -9.495588302612305,
"D": -10.117122650146484,
"E": -8.917900085449219
}
}
},
{
"ex_id": "aqua-test-249",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.821441650390625,
"scores": {
"A": -10.321834564208984,
"B": -8.848502159118652,
"C": -8.027060508728027,
"D": -11.628623962402344,
"E": -11.091792106628418
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.602280616760254,
"scores": {
"A": -6.007650375366211,
"B": -7.634098052978516,
"C": -8.609930992126465,
"D": -8.443798065185547,
"E": -8.685563087463379
}
},
"patched_0": {
"pred_label": "B",
"correct": false,
"margin": -0.3504180908203125,
"scores": {
"A": -8.986002922058105,
"B": -6.979213714599609,
"C": -7.329631805419922,
"D": -10.10708236694336,
"E": -9.747851371765137
}
},
"patched_01": {
"pred_label": "C",
"correct": true,
"margin": 0.8214454650878906,
"scores": {
"A": -10.321839332580566,
"B": -8.848505973815918,
"C": -8.027060508728027,
"D": -11.628629684448242,
"E": -11.091798782348633
}
},
"patched_full": {
"pred_label": "C",
"correct": true,
"margin": 0.8214454650878906,
"scores": {
"A": -10.321839332580566,
"B": -8.848505973815918,
"C": -8.027060508728027,
"D": -11.628629684448242,
"E": -11.091798782348633
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -2.610713005065918,
"scores": {
"A": -5.542219161987305,
"B": -7.004818916320801,
"C": -8.152932167053223,
"D": -8.9476957321167,
"E": -9.82064151763916
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": false,
"margin": -0.41672515869140625,
"scores": {
"A": -8.892544746398926,
"B": -6.766074180603027,
"C": -7.182799339294434,
"D": -9.836996078491211,
"E": -9.51023006439209
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -2.0196499824523926,
"scores": {
"A": -3.997561454772949,
"B": -5.764638423919678,
"C": -6.017211437225342,
"D": -5.872786045074463,
"E": -6.263749599456787
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -2.602281093597412,
"scores": {
"A": -6.007654666900635,
"B": -7.634103298187256,
"C": -8.609935760498047,
"D": -8.443801879882812,
"E": -8.685571670532227
}
}
},
{
"ex_id": "aqua-test-251",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.07524585723876953,
"scores": {
"A": -9.908409118652344,
"B": -9.833163261413574,
"C": -12.424334526062012,
"D": -11.275071144104004,
"E": -10.72103214263916
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.978281021118164,
"scores": {
"A": -9.318894386291504,
"B": -12.297175407409668,
"C": -13.513100624084473,
"D": -12.114720344543457,
"E": -11.161179542541504
}
},
"patched_0": {
"pred_label": "B",
"correct": true,
"margin": 0.53338623046875,
"scores": {
"A": -9.37672233581543,
"B": -8.81983757019043,
"C": -10.16126823425293,
"D": -10.652963638305664,
"E": -9.35322380065918
}
},
"patched_01": {
"pred_label": "B",
"correct": true,
"margin": 0.07524681091308594,
"scores": {
"A": -9.90841007232666,
"B": -9.833163261413574,
"C": -12.424333572387695,
"D": -11.27507209777832,
"E": -10.72103500366211
}
},
"patched_full": {
"pred_label": "B",
"correct": true,
"margin": 0.07524681091308594,
"scores": {
"A": -9.90841007232666,
"B": -9.833163261413574,
"C": -12.424333572387695,
"D": -11.27507209777832,
"E": -10.72103500366211
}
},
"debug_max_abs_diff_patched01_vs_full": 0.0,
"control_rand_subspace": {
"pred_label": "A",
"correct": false,
"margin": -2.792086601257324,
"scores": {
"A": -7.997702598571777,
"B": -10.789789199829102,
"C": -12.683808326721191,
"D": -10.730910301208496,
"E": -9.873717308044434
}
},
"control_time_shuffled": {
"pred_label": "B",
"correct": true,
"margin": 0.42225170135498047,
"scores": {
"A": -9.392989158630371,
"B": -8.97073745727539,
"C": -10.319923400878906,
"D": -10.757732391357422,
"E": -9.409377098083496
}
},
"control_shared_randvec": {
"pred_label": "A",
"correct": false,
"margin": -2.0303096771240234,
"scores": {
"A": -10.767759323120117,
"B": -12.79806900024414,
"C": -13.778217315673828,
"D": -12.801124572753906,
"E": -11.79677963256836
}
},
"control_patch_nonshared": {
"pred_label": "A",
"correct": false,
"margin": -2.978278160095215,
"scores": {
"A": -9.31889533996582,
"B": -12.297173500061035,
"C": -13.513103485107422,
"D": -12.114721298217773,
"E": -11.16118049621582
}
}
}
]
}