decodeshare / artifacts /patch_back /results /flipset_sweep_transfer.json
Zishan-Shao's picture
Upload folder using huggingface_hub
aa0e435 verified
{
"meta": {
"model": "meta-llama/Llama-2-7b-chat-hf",
"device": "cuda",
"dtype": "fp16",
"layer": 10,
"layers_path": "model.layers",
"task": "aqua",
"eval_meta": {
"subspace_split": null,
"eval_split": "test",
"available_splits": [
"train",
"test",
"validation"
],
"hf_id": "aqua_rat",
"options_prefix_stripped": true,
"force_answer_prefix": true
},
"seed": 123,
"candidate_labels": [
"A",
"B",
"C",
"D",
"E"
],
"candidate_text_style": "space_letter",
"candidate_token_lens": {
"A": 2,
"B": 2,
"C": 2,
"D": 2,
"E": 2
},
"max_candidate_token_len": 2,
"Qs_path": "Q_shared_layer10.npy",
"Qs_shape": [
4096,
97
],
"flipset_definition": {
"alpha": 1.0,
"criterion": "baseline correct AND ablated(alpha=1) wrong",
"n_eval_loaded": 254,
"flipset_total": 42,
"flipset_used": 42
},
"alpha_sweep": {
"enabled": true,
"alpha_list": [
0.0,
0.05,
0.1,
0.2,
0.5,
1.0
]
},
"transfer_patching": {
"enabled": true,
"patch_window_requested": "steps_0",
"patch_steps_requested": [
0
],
"patch_steps_final": [
0
],
"run_self_patch_ref": true
}
},
"scan_rows": [
{
"ex_id": "aqua-test-0",
"gold": "B",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -0.5390625,
"scores": {
"A": -9.953125,
"B": -9.9296875,
"C": -9.390625,
"D": -11.7421875,
"E": -11.375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.2421875,
"scores": {
"A": -9.1796875,
"B": -10.421875,
"C": -11.0546875,
"D": -11.21875,
"E": -11.1171875
}
}
},
{
"ex_id": "aqua-test-1",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.0234375,
"scores": {
"A": -12.90625,
"B": -11.53125,
"C": -11.5546875,
"D": -13.53125,
"E": -13.296875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -5.46875,
"scores": {
"A": -6.5625,
"B": -11.109375,
"C": -12.03125,
"D": -11.03125,
"E": -13.0
}
}
},
{
"ex_id": "aqua-test-2",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.2578125,
"scores": {
"A": -11.234375,
"B": -10.2109375,
"C": -13.171875,
"D": -12.4453125,
"E": -10.46875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -7.953125,
"scores": {
"A": -6.0625,
"B": -14.015625,
"C": -17.125,
"D": -15.2734375,
"E": -15.640625
}
}
},
{
"ex_id": "aqua-test-3",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.1171875,
"scores": {
"A": -12.6796875,
"B": -8.5078125,
"C": -9.625,
"D": -12.859375,
"E": -15.15625
}
},
"ablated_1": {
"pred_label": "B",
"correct": false,
"margin": -1.0546875,
"scores": {
"A": -9.75,
"B": -9.1875,
"C": -10.2421875,
"D": -11.046875,
"E": -10.8984375
}
}
},
{
"ex_id": "aqua-test-4",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 0.8984375,
"scores": {
"A": -10.921875,
"B": -11.8203125,
"C": -13.1171875,
"D": -12.671875,
"E": -12.2578125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 1.8125,
"scores": {
"A": -8.171875,
"B": -10.75,
"C": -11.90625,
"D": -9.984375,
"E": -10.890625
}
}
},
{
"ex_id": "aqua-test-5",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -11.9921875,
"B": -10.9765625,
"C": -12.0390625,
"D": -11.9609375,
"E": -11.9296875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.1640625,
"scores": {
"A": -7.59375,
"B": -9.7578125,
"C": -11.0234375,
"D": -9.1953125,
"E": -10.0625
}
}
},
{
"ex_id": "aqua-test-6",
"gold": "C",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -2.40625,
"scores": {
"A": -10.84375,
"B": -12.203125,
"C": -13.25,
"D": -14.3125,
"E": -12.21875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -5.515625,
"scores": {
"A": -7.015625,
"B": -9.8671875,
"C": -12.53125,
"D": -11.03125,
"E": -11.0078125
}
}
},
{
"ex_id": "aqua-test-7",
"gold": "D",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.171875,
"scores": {
"A": -10.5625,
"B": -9.359375,
"C": -8.96875,
"D": -11.140625,
"E": -10.96875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.88671875,
"scores": {
"A": -6.71484375,
"B": -10.6875,
"C": -10.5546875,
"D": -10.6015625,
"E": -12.171875
}
}
},
{
"ex_id": "aqua-test-8",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.28125,
"scores": {
"A": -13.03125,
"B": -11.890625,
"C": -14.171875,
"D": -12.390625,
"E": -14.484375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -8.05859375,
"scores": {
"A": -6.81640625,
"B": -11.90625,
"C": -14.875,
"D": -11.125,
"E": -12.953125
}
}
},
{
"ex_id": "aqua-test-9",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.078125,
"scores": {
"A": -11.265625,
"B": -8.890625,
"C": -9.96875,
"D": -12.359375,
"E": -13.9921875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.01953125,
"scores": {
"A": -7.32421875,
"B": -11.34375,
"C": -11.5,
"D": -13.6875,
"E": -15.28125
}
}
},
{
"ex_id": "aqua-test-10",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.21875,
"scores": {
"A": -11.90625,
"B": -10.2421875,
"C": -13.296875,
"D": -11.453125,
"E": -10.4609375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -7.09375,
"scores": {
"A": -7.1796875,
"B": -11.0390625,
"C": -14.765625,
"D": -12.578125,
"E": -14.2734375
}
}
},
{
"ex_id": "aqua-test-11",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.0390625,
"scores": {
"A": -11.6875,
"B": -10.359375,
"C": -12.546875,
"D": -12.5859375,
"E": -12.3984375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.6796875,
"scores": {
"A": -6.9609375,
"B": -10.3046875,
"C": -13.40625,
"D": -11.125,
"E": -13.640625
}
}
},
{
"ex_id": "aqua-test-12",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.484375,
"scores": {
"A": -12.78125,
"B": -9.015625,
"C": -11.5,
"D": -11.5625,
"E": -13.0
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.1875,
"scores": {
"A": -8.7890625,
"B": -9.625,
"C": -11.9765625,
"D": -8.7890625,
"E": -11.3984375
}
}
},
{
"ex_id": "aqua-test-13",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.7890625,
"scores": {
"A": -12.8515625,
"B": -9.4609375,
"C": -9.546875,
"D": -12.25,
"E": -12.8671875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.83984375,
"scores": {
"A": -6.23828125,
"B": -9.609375,
"C": -8.1015625,
"D": -10.078125,
"E": -13.046875
}
}
},
{
"ex_id": "aqua-test-14",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.625,
"scores": {
"A": -12.59375,
"B": -9.078125,
"C": -10.5390625,
"D": -10.703125,
"E": -9.5078125
}
},
"ablated_1": {
"pred_label": "D",
"correct": true,
"margin": 0.44921875,
"scores": {
"A": -7.1328125,
"B": -8.40625,
"C": -9.1015625,
"D": -6.68359375,
"E": -8.6953125
}
}
},
{
"ex_id": "aqua-test-15",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.671875,
"scores": {
"A": -11.078125,
"B": -10.40625,
"C": -13.625,
"D": -15.3125,
"E": -13.8125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.7421875,
"scores": {
"A": -6.1171875,
"B": -10.859375,
"C": -11.296875,
"D": -11.0625,
"E": -13.578125
}
}
},
{
"ex_id": "aqua-test-16",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 2.80078125,
"scores": {
"A": -12.484375,
"B": -10.515625,
"C": -7.71484375,
"D": -12.859375,
"E": -12.8125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -0.87109375,
"scores": {
"A": -7.82421875,
"B": -9.453125,
"C": -8.6953125,
"D": -9.59375,
"E": -11.6953125
}
}
},
{
"ex_id": "aqua-test-17",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.0625,
"scores": {
"A": -11.5625,
"B": -10.5,
"C": -12.515625,
"D": -12.8125,
"E": -12.875
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 4.5,
"scores": {
"A": -7.1796875,
"B": -12.2578125,
"C": -14.125,
"D": -11.6796875,
"E": -15.5078125
}
}
},
{
"ex_id": "aqua-test-18",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.3984375,
"scores": {
"A": -11.515625,
"B": -10.9765625,
"C": -13.96875,
"D": -12.375,
"E": -12.171875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.6171875,
"scores": {
"A": -7.203125,
"B": -10.859375,
"C": -14.8125,
"D": -11.8203125,
"E": -15.4921875
}
}
},
{
"ex_id": "aqua-test-19",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -0.78125,
"scores": {
"A": -9.515625,
"B": -9.0625,
"C": -8.734375,
"D": -10.546875,
"E": -11.125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 3.40234375,
"scores": {
"A": -5.53515625,
"B": -9.8203125,
"C": -8.9375,
"D": -10.78125,
"E": -11.8828125
}
}
},
{
"ex_id": "aqua-test-20",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.84375,
"scores": {
"A": -10.1875,
"B": -9.6875,
"C": -9.8125,
"D": -11.3125,
"E": -11.53125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -7.8046875,
"scores": {
"A": -6.5234375,
"B": -9.34375,
"C": -12.984375,
"D": -10.1328125,
"E": -14.328125
}
}
},
{
"ex_id": "aqua-test-21",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.765625,
"scores": {
"A": -10.4140625,
"B": -9.6484375,
"C": -12.5546875,
"D": -12.234375,
"E": -11.3828125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.5390625,
"scores": {
"A": -6.1328125,
"B": -9.671875,
"C": -10.8046875,
"D": -10.671875,
"E": -11.2109375
}
}
},
{
"ex_id": "aqua-test-22",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.96875,
"scores": {
"A": -9.7734375,
"B": -8.546875,
"C": -11.7734375,
"D": -12.625,
"E": -11.515625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -7.88671875,
"scores": {
"A": -6.34765625,
"B": -9.96875,
"C": -12.5625,
"D": -13.21875,
"E": -14.234375
}
}
},
{
"ex_id": "aqua-test-23",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.125,
"scores": {
"A": -9.890625,
"B": -8.765625,
"C": -10.03125,
"D": -11.890625,
"E": -10.671875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.6796875,
"scores": {
"A": -6.0859375,
"B": -10.3125,
"C": -12.046875,
"D": -12.765625,
"E": -13.46875
}
}
},
{
"ex_id": "aqua-test-24",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.5390625,
"scores": {
"A": -11.6328125,
"B": -10.71875,
"C": -13.28125,
"D": -11.4296875,
"E": -11.2578125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -0.109375,
"scores": {
"A": -7.28125,
"B": -7.6171875,
"C": -9.4375,
"D": -7.8515625,
"E": -7.390625
}
}
},
{
"ex_id": "aqua-test-25",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.0546875,
"scores": {
"A": -12.953125,
"B": -12.2578125,
"C": -12.203125,
"D": -12.4140625,
"E": -13.6328125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.5625,
"scores": {
"A": -8.6484375,
"B": -10.8828125,
"C": -10.2109375,
"D": -9.4609375,
"E": -10.859375
}
}
},
{
"ex_id": "aqua-test-26",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -3.8125,
"scores": {
"A": -12.578125,
"B": -10.75,
"C": -8.765625,
"D": -12.953125,
"E": -10.875
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 0.6015625,
"scores": {
"A": -7.703125,
"B": -10.7890625,
"C": -8.3046875,
"D": -9.640625,
"E": -11.390625
}
}
},
{
"ex_id": "aqua-test-27",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.8125,
"scores": {
"A": -10.90625,
"B": -9.609375,
"C": -10.671875,
"D": -12.421875,
"E": -12.0
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.328125,
"scores": {
"A": -8.1640625,
"B": -10.15625,
"C": -11.375,
"D": -10.4921875,
"E": -11.28125
}
}
},
{
"ex_id": "aqua-test-28",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 1.3359375,
"scores": {
"A": -10.4296875,
"B": -11.765625,
"C": -12.7265625,
"D": -13.0390625,
"E": -13.0546875
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 4.234375,
"scores": {
"A": -9.5234375,
"B": -13.7578125,
"C": -14.6015625,
"D": -14.578125,
"E": -13.765625
}
}
},
{
"ex_id": "aqua-test-29",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.6875,
"scores": {
"A": -12.921875,
"B": -10.234375,
"C": -10.7578125,
"D": -10.984375,
"E": -12.3828125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 1.3828125,
"scores": {
"A": -8.4296875,
"B": -10.6015625,
"C": -11.890625,
"D": -9.8125,
"E": -11.0
}
}
},
{
"ex_id": "aqua-test-30",
"gold": "D",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -4.3203125,
"scores": {
"A": -11.671875,
"B": -11.4140625,
"C": -11.09375,
"D": -15.4140625,
"E": -11.8359375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.8203125,
"scores": {
"A": -6.5546875,
"B": -12.5703125,
"C": -14.359375,
"D": -13.375,
"E": -12.265625
}
}
},
{
"ex_id": "aqua-test-31",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.5078125,
"scores": {
"A": -10.9765625,
"B": -9.859375,
"C": -10.3671875,
"D": -10.8515625,
"E": -11.3671875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -5.296875,
"scores": {
"A": -6.6328125,
"B": -10.09375,
"C": -11.9296875,
"D": -10.46875,
"E": -9.6484375
}
}
},
{
"ex_id": "aqua-test-32",
"gold": "B",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -0.453125,
"scores": {
"A": -12.71875,
"B": -10.984375,
"C": -10.53125,
"D": -12.484375,
"E": -11.609375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.62109375,
"scores": {
"A": -6.75390625,
"B": -10.375,
"C": -10.203125,
"D": -12.796875,
"E": -13.765625
}
}
},
{
"ex_id": "aqua-test-33",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 1.1875,
"scores": {
"A": -17.28125,
"B": -18.1875,
"C": -16.09375,
"D": -19.15625,
"E": -19.46875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -0.4375,
"scores": {
"A": -9.125,
"B": -10.1171875,
"C": -9.5625,
"D": -10.0703125,
"E": -10.4921875
}
}
},
{
"ex_id": "aqua-test-34",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.9375,
"scores": {
"A": -14.8359375,
"B": -11.34375,
"C": -14.7109375,
"D": -14.28125,
"E": -15.34375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -5.2890625,
"scores": {
"A": -9.546875,
"B": -11.46875,
"C": -14.8046875,
"D": -14.8359375,
"E": -18.359375
}
}
},
{
"ex_id": "aqua-test-35",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.046875,
"scores": {
"A": -11.9375,
"B": -10.578125,
"C": -11.625,
"D": -11.265625,
"E": -11.609375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.0078125,
"scores": {
"A": -8.0625,
"B": -10.21875,
"C": -10.0703125,
"D": -9.2890625,
"E": -9.390625
}
}
},
{
"ex_id": "aqua-test-36",
"gold": "E",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -1.734375,
"scores": {
"A": -9.8984375,
"B": -10.4921875,
"C": -10.7421875,
"D": -12.6953125,
"E": -11.6328125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.5546875,
"scores": {
"A": -8.3046875,
"B": -12.734375,
"C": -12.671875,
"D": -14.5625,
"E": -14.859375
}
}
},
{
"ex_id": "aqua-test-37",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.0,
"scores": {
"A": -11.25,
"B": -10.5078125,
"C": -13.328125,
"D": -12.7734375,
"E": -11.5078125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -0.84375,
"scores": {
"A": -9.515625,
"B": -10.703125,
"C": -12.234375,
"D": -10.921875,
"E": -10.359375
}
}
},
{
"ex_id": "aqua-test-38",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.59375,
"scores": {
"A": -13.921875,
"B": -12.09375,
"C": -17.015625,
"D": -16.796875,
"E": -13.6875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.6328125,
"scores": {
"A": -8.9765625,
"B": -11.46875,
"C": -14.421875,
"D": -12.515625,
"E": -12.609375
}
}
},
{
"ex_id": "aqua-test-39",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 1.71875,
"scores": {
"A": -10.2265625,
"B": -11.9453125,
"C": -12.1484375,
"D": -14.3125,
"E": -14.015625
}
},
"ablated_1": {
"pred_label": "C",
"correct": false,
"margin": -0.390625,
"scores": {
"A": -10.234375,
"B": -10.1875,
"C": -9.84375,
"D": -11.59375,
"E": -10.8515625
}
}
},
{
"ex_id": "aqua-test-40",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.1875,
"scores": {
"A": -9.90625,
"B": -8.71875,
"C": -8.9375,
"D": -11.40625,
"E": -10.3125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 2.8828125,
"scores": {
"A": -6.9609375,
"B": -10.8671875,
"C": -10.4765625,
"D": -9.84375,
"E": -9.953125
}
}
},
{
"ex_id": "aqua-test-41",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.484375,
"scores": {
"A": -13.234375,
"B": -12.765625,
"C": -14.671875,
"D": -16.25,
"E": -16.5
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.0859375,
"scores": {
"A": -8.6328125,
"B": -9.375,
"C": -10.65625,
"D": -10.71875,
"E": -10.390625
}
}
},
{
"ex_id": "aqua-test-42",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.96875,
"scores": {
"A": -14.828125,
"B": -12.5,
"C": -12.890625,
"D": -13.46875,
"E": -14.859375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.40234375,
"scores": {
"A": -6.67578125,
"B": -11.125,
"C": -12.828125,
"D": -10.078125,
"E": -12.3203125
}
}
},
{
"ex_id": "aqua-test-43",
"gold": "D",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -0.734375,
"scores": {
"A": -12.140625,
"B": -11.703125,
"C": -11.390625,
"D": -12.125,
"E": -13.984375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.8125,
"scores": {
"A": -5.0,
"B": -7.875,
"C": -9.0546875,
"D": -6.8125,
"E": -9.6015625
}
}
},
{
"ex_id": "aqua-test-44",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 0.390625,
"scores": {
"A": -10.1171875,
"B": -10.5078125,
"C": -10.5078125,
"D": -12.1953125,
"E": -15.0
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 3.46875,
"scores": {
"A": -7.4375,
"B": -11.4375,
"C": -14.21875,
"D": -10.90625,
"E": -14.3984375
}
}
},
{
"ex_id": "aqua-test-45",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -1.0546875,
"scores": {
"A": -12.984375,
"B": -12.3359375,
"C": -11.9296875,
"D": -12.359375,
"E": -12.4921875
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 2.30078125,
"scores": {
"A": -7.07421875,
"B": -9.375,
"C": -10.578125,
"D": -9.984375,
"E": -12.9375
}
}
},
{
"ex_id": "aqua-test-46",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.796875,
"scores": {
"A": -12.9609375,
"B": -11.390625,
"C": -11.625,
"D": -11.90625,
"E": -12.1875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.734375,
"scores": {
"A": -7.03125,
"B": -8.359375,
"C": -10.5,
"D": -8.4453125,
"E": -8.765625
}
}
},
{
"ex_id": "aqua-test-47",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.203125,
"scores": {
"A": -11.9453125,
"B": -12.5,
"C": -12.1171875,
"D": -13.046875,
"E": -11.7421875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.0390625,
"scores": {
"A": -9.3828125,
"B": -11.8515625,
"C": -13.359375,
"D": -12.15625,
"E": -13.421875
}
}
},
{
"ex_id": "aqua-test-48",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.296875,
"scores": {
"A": -14.109375,
"B": -14.640625,
"C": -9.0625,
"D": -10.3125,
"E": -11.359375
}
},
"ablated_1": {
"pred_label": "C",
"correct": false,
"margin": -1.5078125,
"scores": {
"A": -7.671875,
"B": -8.9296875,
"C": -6.4140625,
"D": -7.8203125,
"E": -7.921875
}
}
},
{
"ex_id": "aqua-test-49",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -4.125,
"scores": {
"A": -15.953125,
"B": -11.828125,
"C": -14.90625,
"D": -13.234375,
"E": -13.984375
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 1.09375,
"scores": {
"A": -8.953125,
"B": -10.046875,
"C": -13.53125,
"D": -10.265625,
"E": -12.515625
}
}
},
{
"ex_id": "aqua-test-50",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.2890625,
"scores": {
"A": -9.515625,
"B": -8.5703125,
"C": -9.984375,
"D": -10.859375,
"E": -10.7265625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.7578125,
"scores": {
"A": -7.0546875,
"B": -10.078125,
"C": -12.5625,
"D": -10.8125,
"E": -13.34375
}
}
},
{
"ex_id": "aqua-test-51",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.25,
"scores": {
"A": -11.4375,
"B": -9.515625,
"C": -9.6171875,
"D": -12.765625,
"E": -11.96875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -5.03125,
"scores": {
"A": -5.234375,
"B": -8.6015625,
"C": -12.21875,
"D": -10.265625,
"E": -11.484375
}
}
},
{
"ex_id": "aqua-test-52",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.0625,
"scores": {
"A": -12.890625,
"B": -9.8515625,
"C": -9.9140625,
"D": -11.515625,
"E": -10.6875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -0.4765625,
"scores": {
"A": -5.1328125,
"B": -5.609375,
"C": -6.609375,
"D": -6.8984375,
"E": -6.4296875
}
}
},
{
"ex_id": "aqua-test-53",
"gold": "D",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -4.0234375,
"scores": {
"A": -12.7109375,
"B": -12.4140625,
"C": -8.390625,
"D": -12.4140625,
"E": -13.4609375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.78125,
"scores": {
"A": -4.9375,
"B": -8.796875,
"C": -8.203125,
"D": -9.71875,
"E": -10.1171875
}
}
},
{
"ex_id": "aqua-test-54",
"gold": "D",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.21875,
"scores": {
"A": -12.4140625,
"B": -12.7578125,
"C": -11.3046875,
"D": -13.5234375,
"E": -11.6484375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -5.5625,
"scores": {
"A": -7.515625,
"B": -12.953125,
"C": -11.71875,
"D": -13.078125,
"E": -12.3203125
}
}
},
{
"ex_id": "aqua-test-55",
"gold": "B",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -0.859375,
"scores": {
"A": -10.5,
"B": -10.5703125,
"C": -9.7109375,
"D": -13.1640625,
"E": -11.4921875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.265625,
"scores": {
"A": -8.015625,
"B": -10.28125,
"C": -11.234375,
"D": -11.296875,
"E": -13.421875
}
}
},
{
"ex_id": "aqua-test-56",
"gold": "D",
"baseline": {
"pred_label": "E",
"correct": false,
"margin": -1.5703125,
"scores": {
"A": -10.5859375,
"B": -11.296875,
"C": -12.3671875,
"D": -12.1328125,
"E": -10.5625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.2734375,
"scores": {
"A": -7.0625,
"B": -8.96875,
"C": -10.53125,
"D": -8.3359375,
"E": -9.71875
}
}
},
{
"ex_id": "aqua-test-57",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.15625,
"scores": {
"A": -13.875,
"B": -12.96875,
"C": -14.359375,
"D": -14.140625,
"E": -13.125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.640625,
"scores": {
"A": -10.3125,
"B": -12.953125,
"C": -12.8203125,
"D": -12.8359375,
"E": -12.5078125
}
}
},
{
"ex_id": "aqua-test-58",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.4296875,
"scores": {
"A": -15.53125,
"B": -10.21875,
"C": -13.6484375,
"D": -15.59375,
"E": -14.75
}
},
"ablated_1": {
"pred_label": "B",
"correct": false,
"margin": -6.56640625,
"scores": {
"A": -10.90625,
"B": -7.32421875,
"C": -13.890625,
"D": -11.015625,
"E": -13.09375
}
}
},
{
"ex_id": "aqua-test-59",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.03125,
"scores": {
"A": -9.5546875,
"B": -8.5234375,
"C": -9.390625,
"D": -11.921875,
"E": -12.15625
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 2.609375,
"scores": {
"A": -7.515625,
"B": -10.125,
"C": -10.609375,
"D": -12.1328125,
"E": -13.125
}
}
},
{
"ex_id": "aqua-test-60",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -4.2734375,
"scores": {
"A": -9.9921875,
"B": -9.9765625,
"C": -10.4921875,
"D": -14.25,
"E": -14.8125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.6796875,
"scores": {
"A": -5.640625,
"B": -9.828125,
"C": -11.71875,
"D": -12.3203125,
"E": -13.484375
}
}
},
{
"ex_id": "aqua-test-61",
"gold": "E",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -1.4921875,
"scores": {
"A": -11.6875,
"B": -13.140625,
"C": -15.5078125,
"D": -12.734375,
"E": -13.1796875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.265625,
"scores": {
"A": -7.3671875,
"B": -8.8828125,
"C": -12.46875,
"D": -8.6640625,
"E": -9.6328125
}
}
},
{
"ex_id": "aqua-test-62",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -4.09375,
"scores": {
"A": -12.8203125,
"B": -9.875,
"C": -8.7265625,
"D": -13.2578125,
"E": -14.109375
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 2.9140625,
"scores": {
"A": -5.859375,
"B": -8.7734375,
"C": -8.9140625,
"D": -11.6953125,
"E": -13.875
}
}
},
{
"ex_id": "aqua-test-63",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.8203125,
"scores": {
"A": -12.9921875,
"B": -11.171875,
"C": -12.375,
"D": -15.0625,
"E": -14.8125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 6.546875,
"scores": {
"A": -9.703125,
"B": -16.796875,
"C": -17.53125,
"D": -16.25,
"E": -17.90625
}
}
},
{
"ex_id": "aqua-test-64",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.109375,
"scores": {
"A": -11.2578125,
"B": -9.859375,
"C": -9.96875,
"D": -11.4765625,
"E": -13.296875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.38671875,
"scores": {
"A": -7.83203125,
"B": -8.5,
"C": -10.21875,
"D": -9.90625,
"E": -10.09375
}
}
},
{
"ex_id": "aqua-test-65",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.8125,
"scores": {
"A": -11.46875,
"B": -10.65625,
"C": -11.546875,
"D": -11.28125,
"E": -12.859375
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 2.3515625,
"scores": {
"A": -8.5,
"B": -10.8515625,
"C": -13.46875,
"D": -12.703125,
"E": -12.703125
}
}
},
{
"ex_id": "aqua-test-66",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.1328125,
"scores": {
"A": -12.5234375,
"B": -10.875,
"C": -12.2421875,
"D": -11.890625,
"E": -13.0078125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.2734375,
"scores": {
"A": -8.6328125,
"B": -10.921875,
"C": -14.421875,
"D": -10.703125,
"E": -11.90625
}
}
},
{
"ex_id": "aqua-test-67",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.359375,
"scores": {
"A": -8.828125,
"B": -8.546875,
"C": -9.703125,
"D": -9.90625,
"E": -10.28125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.71875,
"scores": {
"A": -6.6328125,
"B": -11.28125,
"C": -13.203125,
"D": -11.3515625,
"E": -13.6015625
}
}
},
{
"ex_id": "aqua-test-68",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.6875,
"scores": {
"A": -11.65625,
"B": -10.96875,
"C": -11.875,
"D": -12.078125,
"E": -12.640625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.1953125,
"scores": {
"A": -9.03125,
"B": -11.2265625,
"C": -11.265625,
"D": -11.5234375,
"E": -10.6171875
}
}
},
{
"ex_id": "aqua-test-69",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.5,
"scores": {
"A": -12.7578125,
"B": -10.59375,
"C": -11.09375,
"D": -14.09375,
"E": -13.5859375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.9453125,
"scores": {
"A": -6.21875,
"B": -9.140625,
"C": -13.1640625,
"D": -11.9453125,
"E": -11.6796875
}
}
},
{
"ex_id": "aqua-test-70",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.75,
"scores": {
"A": -12.375,
"B": -8.625,
"C": -13.84375,
"D": -10.7578125,
"E": -13.421875
}
},
"ablated_1": {
"pred_label": "B",
"correct": false,
"margin": -0.03125,
"scores": {
"A": -7.08203125,
"B": -7.05078125,
"C": -11.171875,
"D": -7.4140625,
"E": -10.75
}
}
},
{
"ex_id": "aqua-test-71",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -3.890625,
"scores": {
"A": -14.125,
"B": -15.40625,
"C": -10.234375,
"D": -13.78125,
"E": -11.53125
}
},
"ablated_1": {
"pred_label": "E",
"correct": false,
"margin": -0.4140625,
"scores": {
"A": -8.5703125,
"B": -12.78125,
"C": -9.265625,
"D": -9.6484375,
"E": -8.15625
}
}
},
{
"ex_id": "aqua-test-72",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.359375,
"scores": {
"A": -14.15625,
"B": -12.328125,
"C": -13.015625,
"D": -13.453125,
"E": -13.6875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.46875,
"scores": {
"A": -8.6796875,
"B": -9.1640625,
"C": -10.9140625,
"D": -9.5625,
"E": -10.1484375
}
}
},
{
"ex_id": "aqua-test-73",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -6.7265625,
"scores": {
"A": -15.75,
"B": -13.71875,
"C": -10.2265625,
"D": -14.84375,
"E": -16.953125
}
},
"ablated_1": {
"pred_label": "C",
"correct": false,
"margin": -4.62109375,
"scores": {
"A": -7.96875,
"B": -8.640625,
"C": -5.91015625,
"D": -10.078125,
"E": -10.53125
}
}
},
{
"ex_id": "aqua-test-74",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -1.40625,
"scores": {
"A": -12.734375,
"B": -11.671875,
"C": -11.328125,
"D": -13.28125,
"E": -14.6875
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 0.25,
"scores": {
"A": -7.734375,
"B": -9.125,
"C": -7.984375,
"D": -8.2421875,
"E": -8.78125
}
}
},
{
"ex_id": "aqua-test-75",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.4375,
"scores": {
"A": -12.078125,
"B": -8.640625,
"C": -10.578125,
"D": -12.375,
"E": -13.671875
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 4.6328125,
"scores": {
"A": -6.7578125,
"B": -11.390625,
"C": -13.0234375,
"D": -11.796875,
"E": -13.3125
}
}
},
{
"ex_id": "aqua-test-76",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.5703125,
"scores": {
"A": -13.40625,
"B": -11.0234375,
"C": -11.59375,
"D": -15.15625,
"E": -14.484375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.0078125,
"scores": {
"A": -10.6640625,
"B": -12.421875,
"C": -12.671875,
"D": -12.859375,
"E": -14.03125
}
}
},
{
"ex_id": "aqua-test-77",
"gold": "D",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.890625,
"scores": {
"A": -12.28125,
"B": -10.5,
"C": -9.2265625,
"D": -12.1171875,
"E": -14.09375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -5.109375,
"scores": {
"A": -6.25,
"B": -8.390625,
"C": -11.625,
"D": -11.359375,
"E": -12.09375
}
}
},
{
"ex_id": "aqua-test-78",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.078125,
"scores": {
"A": -12.7890625,
"B": -8.3203125,
"C": -11.3984375,
"D": -13.765625,
"E": -13.84375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.63671875,
"scores": {
"A": -5.62890625,
"B": -10.265625,
"C": -11.3125,
"D": -12.078125,
"E": -12.4296875
}
}
},
{
"ex_id": "aqua-test-79",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.65625,
"scores": {
"A": -11.9375,
"B": -11.28125,
"C": -12.421875,
"D": -11.671875,
"E": -12.890625
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 1.09375,
"scores": {
"A": -7.03125,
"B": -8.3359375,
"C": -10.1875,
"D": -8.125,
"E": -9.96875
}
}
},
{
"ex_id": "aqua-test-80",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.5,
"scores": {
"A": -10.703125,
"B": -9.203125,
"C": -10.328125,
"D": -10.4375,
"E": -11.28125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 1.1484375,
"scores": {
"A": -6.0,
"B": -7.1484375,
"C": -8.6328125,
"D": -7.5234375,
"E": -7.953125
}
}
},
{
"ex_id": "aqua-test-81",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.7265625,
"scores": {
"A": -12.1875,
"B": -11.984375,
"C": -10.6015625,
"D": -14.203125,
"E": -13.328125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.7109375,
"scores": {
"A": -10.2890625,
"B": -13.0390625,
"C": -12.3828125,
"D": -12.15625,
"E": -13.0
}
}
},
{
"ex_id": "aqua-test-82",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.78125,
"scores": {
"A": -12.3046875,
"B": -9.84375,
"C": -12.625,
"D": -14.0078125,
"E": -11.7890625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.359375,
"scores": {
"A": -6.59375,
"B": -9.5390625,
"C": -10.953125,
"D": -10.6875,
"E": -9.8828125
}
}
},
{
"ex_id": "aqua-test-83",
"gold": "D",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -1.921875,
"scores": {
"A": -10.3046875,
"B": -9.1640625,
"C": -8.2421875,
"D": -10.1640625,
"E": -9.40625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.078125,
"scores": {
"A": -5.59375,
"B": -7.6640625,
"C": -9.4296875,
"D": -8.671875,
"E": -10.1015625
}
}
},
{
"ex_id": "aqua-test-84",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.3828125,
"scores": {
"A": -11.46875,
"B": -10.9765625,
"C": -9.0859375,
"D": -9.7890625,
"E": -10.390625
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 1.09375,
"scores": {
"A": -9.6875,
"B": -11.546875,
"C": -11.609375,
"D": -10.78125,
"E": -11.5859375
}
}
},
{
"ex_id": "aqua-test-85",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.0078125,
"scores": {
"A": -11.0234375,
"B": -9.9296875,
"C": -9.015625,
"D": -13.453125,
"E": -13.5703125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 3.6484375,
"scores": {
"A": -6.2265625,
"B": -10.890625,
"C": -9.875,
"D": -11.765625,
"E": -14.03125
}
}
},
{
"ex_id": "aqua-test-86",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.75,
"scores": {
"A": -10.234375,
"B": -8.484375,
"C": -10.546875,
"D": -12.03125,
"E": -11.203125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 1.78125,
"scores": {
"A": -9.25,
"B": -11.03125,
"C": -12.28125,
"D": -12.1875,
"E": -11.21875
}
}
},
{
"ex_id": "aqua-test-87",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.625,
"scores": {
"A": -9.7890625,
"B": -9.1640625,
"C": -11.234375,
"D": -12.0,
"E": -11.46875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.90625,
"scores": {
"A": -6.578125,
"B": -9.484375,
"C": -9.703125,
"D": -9.1875,
"E": -11.25
}
}
},
{
"ex_id": "aqua-test-88",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.40625,
"scores": {
"A": -14.6796875,
"B": -12.328125,
"C": -13.0625,
"D": -13.9765625,
"E": -13.734375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.9375,
"scores": {
"A": -5.40625,
"B": -6.2578125,
"C": -7.46875,
"D": -9.0390625,
"E": -10.34375
}
}
},
{
"ex_id": "aqua-test-89",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.02734375,
"scores": {
"A": -10.921875,
"B": -7.89453125,
"C": -10.65625,
"D": -12.421875,
"E": -11.3125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 1.75,
"scores": {
"A": -6.359375,
"B": -8.109375,
"C": -11.4140625,
"D": -9.7109375,
"E": -9.2890625
}
}
},
{
"ex_id": "aqua-test-90",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.5,
"scores": {
"A": -11.0234375,
"B": -10.5234375,
"C": -12.0234375,
"D": -12.125,
"E": -13.828125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.6015625,
"scores": {
"A": -8.1796875,
"B": -9.109375,
"C": -10.78125,
"D": -9.5625,
"E": -10.078125
}
}
},
{
"ex_id": "aqua-test-91",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.390625,
"scores": {
"A": -12.015625,
"B": -10.046875,
"C": -11.4375,
"D": -12.140625,
"E": -10.671875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.6328125,
"scores": {
"A": -8.03125,
"B": -8.96875,
"C": -9.6640625,
"D": -9.765625,
"E": -9.015625
}
}
},
{
"ex_id": "aqua-test-92",
"gold": "B",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -0.359375,
"scores": {
"A": -12.046875,
"B": -12.40625,
"C": -12.75,
"D": -14.078125,
"E": -14.9375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.984375,
"scores": {
"A": -11.0234375,
"B": -15.0078125,
"C": -15.1640625,
"D": -13.609375,
"E": -17.765625
}
}
},
{
"ex_id": "aqua-test-93",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.84375,
"scores": {
"A": -13.046875,
"B": -9.09375,
"C": -9.984375,
"D": -10.78125,
"E": -10.9375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.7421875,
"scores": {
"A": -8.2421875,
"B": -12.109375,
"C": -15.703125,
"D": -13.6875,
"E": -14.984375
}
}
},
{
"ex_id": "aqua-test-94",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -1.921875,
"scores": {
"A": -12.7265625,
"B": -10.6796875,
"C": -10.0390625,
"D": -13.703125,
"E": -11.9609375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -5.5859375,
"scores": {
"A": -5.8046875,
"B": -6.28125,
"C": -9.3125,
"D": -10.296875,
"E": -11.390625
}
}
},
{
"ex_id": "aqua-test-95",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -3.9375,
"scores": {
"A": -11.21875,
"B": -9.1015625,
"C": -8.0234375,
"D": -12.296875,
"E": -11.9609375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.4609375,
"scores": {
"A": -8.203125,
"B": -9.6015625,
"C": -8.6171875,
"D": -9.53125,
"E": -9.6640625
}
}
},
{
"ex_id": "aqua-test-96",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -4.328125,
"scores": {
"A": -9.84375,
"B": -9.140625,
"C": -13.0625,
"D": -11.453125,
"E": -13.46875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.37890625,
"scores": {
"A": -7.28515625,
"B": -8.671875,
"C": -13.0,
"D": -10.65625,
"E": -11.6640625
}
}
},
{
"ex_id": "aqua-test-97",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.828125,
"scores": {
"A": -14.46875,
"B": -12.09375,
"C": -13.828125,
"D": -14.921875,
"E": -13.8125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.6328125,
"scores": {
"A": -10.09375,
"B": -10.359375,
"C": -13.9140625,
"D": -14.7265625,
"E": -14.140625
}
}
},
{
"ex_id": "aqua-test-98",
"gold": "D",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.0078125,
"scores": {
"A": -12.9921875,
"B": -10.796875,
"C": -10.578125,
"D": -12.5859375,
"E": -11.890625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.44140625,
"scores": {
"A": -4.04296875,
"B": -8.203125,
"C": -10.59375,
"D": -10.484375,
"E": -12.0390625
}
}
},
{
"ex_id": "aqua-test-99",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.703125,
"scores": {
"A": -10.875,
"B": -9.203125,
"C": -9.609375,
"D": -11.328125,
"E": -10.90625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -7.5859375,
"scores": {
"A": -7.1484375,
"B": -8.3359375,
"C": -12.75,
"D": -12.609375,
"E": -14.734375
}
}
},
{
"ex_id": "aqua-test-100",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.1484375,
"scores": {
"A": -9.265625,
"B": -9.7265625,
"C": -9.1171875,
"D": -10.0546875,
"E": -10.6015625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.16015625,
"scores": {
"A": -4.76171875,
"B": -9.390625,
"C": -10.921875,
"D": -11.46875,
"E": -13.1875
}
}
},
{
"ex_id": "aqua-test-101",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 0.3359375,
"scores": {
"A": -10.09375,
"B": -10.4296875,
"C": -11.984375,
"D": -10.6015625,
"E": -12.453125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 4.4921875,
"scores": {
"A": -7.59375,
"B": -12.0859375,
"C": -13.8125,
"D": -12.8125,
"E": -14.53125
}
}
},
{
"ex_id": "aqua-test-102",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -4.4609375,
"scores": {
"A": -11.3046875,
"B": -10.4296875,
"C": -10.671875,
"D": -14.109375,
"E": -14.890625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -7.984375,
"scores": {
"A": -6.84375,
"B": -10.515625,
"C": -13.328125,
"D": -13.984375,
"E": -14.828125
}
}
},
{
"ex_id": "aqua-test-103",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.1484375,
"scores": {
"A": -9.734375,
"B": -8.5234375,
"C": -9.6875,
"D": -11.4375,
"E": -9.671875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -0.75,
"scores": {
"A": -7.84375,
"B": -8.59375,
"C": -10.53125,
"D": -9.78125,
"E": -8.421875
}
}
},
{
"ex_id": "aqua-test-104",
"gold": "D",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -3.3671875,
"scores": {
"A": -14.203125,
"B": -11.3671875,
"C": -11.15625,
"D": -14.5234375,
"E": -15.984375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.8984375,
"scores": {
"A": -8.5859375,
"B": -10.609375,
"C": -11.203125,
"D": -11.484375,
"E": -10.953125
}
}
},
{
"ex_id": "aqua-test-105",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 3.03125,
"scores": {
"A": -11.5,
"B": -12.0234375,
"C": -8.46875,
"D": -13.9765625,
"E": -13.28125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.12109375,
"scores": {
"A": -7.45703125,
"B": -9.78125,
"C": -8.578125,
"D": -13.375,
"E": -13.59375
}
}
},
{
"ex_id": "aqua-test-106",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.9296875,
"scores": {
"A": -10.109375,
"B": -9.1796875,
"C": -9.4453125,
"D": -13.0234375,
"E": -13.1328125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 3.88671875,
"scores": {
"A": -7.59765625,
"B": -11.9453125,
"C": -13.4375,
"D": -11.484375,
"E": -14.0703125
}
}
},
{
"ex_id": "aqua-test-107",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.015625,
"scores": {
"A": -9.90625,
"B": -8.890625,
"C": -11.015625,
"D": -12.1875,
"E": -10.1484375
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 3.1640625,
"scores": {
"A": -7.0546875,
"B": -10.21875,
"C": -11.671875,
"D": -10.859375,
"E": -11.921875
}
}
},
{
"ex_id": "aqua-test-108",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.65625,
"scores": {
"A": -12.28125,
"B": -9.625,
"C": -12.828125,
"D": -13.28125,
"E": -14.59375
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 1.875,
"scores": {
"A": -6.484375,
"B": -8.359375,
"C": -11.03125,
"D": -9.9921875,
"E": -11.953125
}
}
},
{
"ex_id": "aqua-test-109",
"gold": "C",
"baseline": {
"pred_label": "E",
"correct": false,
"margin": -0.8203125,
"scores": {
"A": -10.0546875,
"B": -11.3125,
"C": -10.7734375,
"D": -12.453125,
"E": -9.953125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.171875,
"scores": {
"A": -8.9375,
"B": -13.0625,
"C": -15.109375,
"D": -13.6875,
"E": -14.53125
}
}
},
{
"ex_id": "aqua-test-110",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.28125,
"scores": {
"A": -12.046875,
"B": -8.4375,
"C": -11.71875,
"D": -12.578125,
"E": -15.1328125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -5.609375,
"scores": {
"A": -5.6796875,
"B": -9.0703125,
"C": -11.2890625,
"D": -11.3984375,
"E": -13.359375
}
}
},
{
"ex_id": "aqua-test-111",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.203125,
"scores": {
"A": -9.796875,
"B": -9.2734375,
"C": -9.4765625,
"D": -10.7578125,
"E": -11.4296875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.6328125,
"scores": {
"A": -8.4140625,
"B": -12.046875,
"C": -12.484375,
"D": -13.3125,
"E": -14.53125
}
}
},
{
"ex_id": "aqua-test-112",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -5.109375,
"scores": {
"A": -16.6875,
"B": -16.21875,
"C": -11.578125,
"D": -13.8203125,
"E": -15.546875
}
},
"ablated_1": {
"pred_label": "D",
"correct": false,
"margin": -0.0859375,
"scores": {
"A": -11.0078125,
"B": -13.3203125,
"C": -11.015625,
"D": -10.921875,
"E": -15.7734375
}
}
},
{
"ex_id": "aqua-test-113",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.359375,
"scores": {
"A": -10.40625,
"B": -9.71875,
"C": -10.078125,
"D": -12.84375,
"E": -11.2578125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -7.25390625,
"scores": {
"A": -5.61328125,
"B": -11.015625,
"C": -12.8671875,
"D": -11.5625,
"E": -13.203125
}
}
},
{
"ex_id": "aqua-test-114",
"gold": "B",
"baseline": {
"pred_label": "E",
"correct": false,
"margin": -0.203125,
"scores": {
"A": -12.5234375,
"B": -11.9609375,
"C": -12.390625,
"D": -12.65625,
"E": -11.7578125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.890625,
"scores": {
"A": -5.28125,
"B": -10.171875,
"C": -12.328125,
"D": -10.734375,
"E": -12.53125
}
}
},
{
"ex_id": "aqua-test-115",
"gold": "E",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -1.28125,
"scores": {
"A": -10.9375,
"B": -11.484375,
"C": -11.296875,
"D": -12.328125,
"E": -12.21875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.3515625,
"scores": {
"A": -9.8984375,
"B": -12.703125,
"C": -13.203125,
"D": -11.328125,
"E": -11.25
}
}
},
{
"ex_id": "aqua-test-116",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.7109375,
"scores": {
"A": -12.0390625,
"B": -9.7421875,
"C": -11.453125,
"D": -11.5390625,
"E": -11.8203125
}
},
"ablated_1": {
"pred_label": "D",
"correct": false,
"margin": -0.51171875,
"scores": {
"A": -7.28125,
"B": -7.4921875,
"C": -8.8203125,
"D": -6.98046875,
"E": -7.3046875
}
}
},
{
"ex_id": "aqua-test-117",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.203125,
"scores": {
"A": -11.703125,
"B": -9.125,
"C": -9.078125,
"D": -11.03125,
"E": -11.28125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.0234375,
"scores": {
"A": -6.42578125,
"B": -6.91015625,
"C": -7.69921875,
"D": -8.4375,
"E": -7.44921875
}
}
},
{
"ex_id": "aqua-test-118",
"gold": "C",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -0.34375,
"scores": {
"A": -12.6953125,
"B": -14.203125,
"C": -13.0390625,
"D": -14.6640625,
"E": -13.8125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.234375,
"scores": {
"A": -7.921875,
"B": -10.390625,
"C": -10.15625,
"D": -11.53125,
"E": -11.34375
}
}
},
{
"ex_id": "aqua-test-119",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.921875,
"scores": {
"A": -12.75,
"B": -11.296875,
"C": -13.4296875,
"D": -13.21875,
"E": -12.75
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.125,
"scores": {
"A": -6.8046875,
"B": -7.6171875,
"C": -10.0625,
"D": -10.9296875,
"E": -11.828125
}
}
},
{
"ex_id": "aqua-test-120",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.3515625,
"scores": {
"A": -12.625,
"B": -10.171875,
"C": -10.5234375,
"D": -11.96875,
"E": -12.625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.84375,
"scores": {
"A": -7.4375,
"B": -12.28125,
"C": -11.75,
"D": -11.984375,
"E": -13.75
}
}
},
{
"ex_id": "aqua-test-121",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.6875,
"scores": {
"A": -12.203125,
"B": -10.84375,
"C": -11.015625,
"D": -13.53125,
"E": -13.328125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.828125,
"scores": {
"A": -7.1875,
"B": -9.34375,
"C": -10.5703125,
"D": -12.015625,
"E": -11.6640625
}
}
},
{
"ex_id": "aqua-test-122",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.671875,
"scores": {
"A": -11.09375,
"B": -10.421875,
"C": -13.25,
"D": -13.296875,
"E": -13.5
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.328125,
"scores": {
"A": -6.90625,
"B": -9.234375,
"C": -12.421875,
"D": -11.0,
"E": -12.1796875
}
}
},
{
"ex_id": "aqua-test-123",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 1.9296875,
"scores": {
"A": -12.8125,
"B": -13.265625,
"C": -10.09375,
"D": -12.0234375,
"E": -12.84375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.0625,
"scores": {
"A": -7.71875,
"B": -10.9296875,
"C": -11.78125,
"D": -11.5546875,
"E": -13.9375
}
}
},
{
"ex_id": "aqua-test-124",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.96875,
"scores": {
"A": -10.953125,
"B": -9.9296875,
"C": -10.8984375,
"D": -11.1875,
"E": -11.4375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.4140625,
"scores": {
"A": -9.1328125,
"B": -11.734375,
"C": -11.546875,
"D": -11.625,
"E": -11.328125
}
}
},
{
"ex_id": "aqua-test-125",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.25,
"scores": {
"A": -12.8984375,
"B": -12.015625,
"C": -10.3671875,
"D": -10.6171875,
"E": -11.515625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.16796875,
"scores": {
"A": -6.90234375,
"B": -8.6875,
"C": -10.0703125,
"D": -9.84375,
"E": -10.1640625
}
}
},
{
"ex_id": "aqua-test-126",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.546875,
"scores": {
"A": -11.015625,
"B": -8.3515625,
"C": -8.8984375,
"D": -9.953125,
"E": -10.5
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -5.8671875,
"scores": {
"A": -5.8046875,
"B": -10.6484375,
"C": -11.671875,
"D": -11.6796875,
"E": -13.015625
}
}
},
{
"ex_id": "aqua-test-127",
"gold": "A",
"baseline": {
"pred_label": "D",
"correct": false,
"margin": -0.0625,
"scores": {
"A": -10.5546875,
"B": -10.7734375,
"C": -10.859375,
"D": -10.4921875,
"E": -11.0703125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 1.21875,
"scores": {
"A": -6.5,
"B": -8.7109375,
"C": -9.375,
"D": -7.71875,
"E": -8.375
}
}
},
{
"ex_id": "aqua-test-128",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.0078125,
"scores": {
"A": -10.7890625,
"B": -10.34375,
"C": -10.8359375,
"D": -13.4609375,
"E": -12.3515625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.97265625,
"scores": {
"A": -5.43359375,
"B": -7.93359375,
"C": -9.234375,
"D": -11.1875,
"E": -10.40625
}
}
},
{
"ex_id": "aqua-test-129",
"gold": "D",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -2.609375,
"scores": {
"A": -8.3125,
"B": -10.671875,
"C": -9.640625,
"D": -10.921875,
"E": -9.640625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.0390625,
"scores": {
"A": -6.0390625,
"B": -8.234375,
"C": -7.6875,
"D": -8.078125,
"E": -8.28125
}
}
},
{
"ex_id": "aqua-test-130",
"gold": "D",
"baseline": {
"pred_label": "D",
"correct": true,
"margin": 0.5625,
"scores": {
"A": -12.53125,
"B": -11.625,
"C": -14.40625,
"D": -11.0625,
"E": -12.203125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -0.62890625,
"scores": {
"A": -7.70703125,
"B": -7.73046875,
"C": -10.4296875,
"D": -8.3359375,
"E": -8.21875
}
}
},
{
"ex_id": "aqua-test-131",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.3046875,
"scores": {
"A": -14.7265625,
"B": -9.6171875,
"C": -12.921875,
"D": -13.8828125,
"E": -14.4765625
}
},
"ablated_1": {
"pred_label": "B",
"correct": true,
"margin": 0.75,
"scores": {
"A": -9.296875,
"B": -8.546875,
"C": -11.984375,
"D": -11.671875,
"E": -12.25
}
}
},
{
"ex_id": "aqua-test-132",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.1640625,
"scores": {
"A": -12.734375,
"B": -9.8671875,
"C": -11.4375,
"D": -11.078125,
"E": -13.03125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.515625,
"scores": {
"A": -9.109375,
"B": -9.109375,
"C": -10.546875,
"D": -10.34375,
"E": -10.625
}
}
},
{
"ex_id": "aqua-test-133",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -4.1015625,
"scores": {
"A": -14.859375,
"B": -16.15625,
"C": -10.7578125,
"D": -14.6015625,
"E": -13.9765625
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 3.8203125,
"scores": {
"A": -8.59375,
"B": -13.015625,
"C": -12.4140625,
"D": -14.5625,
"E": -14.453125
}
}
},
{
"ex_id": "aqua-test-134",
"gold": "D",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -2.46875,
"scores": {
"A": -10.7109375,
"B": -11.8984375,
"C": -13.1484375,
"D": -13.1796875,
"E": -13.21875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -5.0,
"scores": {
"A": -7.7265625,
"B": -10.8125,
"C": -11.3671875,
"D": -12.7265625,
"E": -14.625
}
}
},
{
"ex_id": "aqua-test-135",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.515625,
"scores": {
"A": -13.609375,
"B": -10.125,
"C": -10.8984375,
"D": -13.640625,
"E": -13.96875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.02734375,
"scores": {
"A": -7.17578125,
"B": -8.53125,
"C": -9.078125,
"D": -9.203125,
"E": -10.515625
}
}
},
{
"ex_id": "aqua-test-136",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.6875,
"scores": {
"A": -12.5546875,
"B": -11.8671875,
"C": -11.921875,
"D": -14.296875,
"E": -14.3984375
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 1.875,
"scores": {
"A": -10.15625,
"B": -12.46875,
"C": -12.46875,
"D": -12.03125,
"E": -13.0703125
}
}
},
{
"ex_id": "aqua-test-137",
"gold": "B",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -0.421875,
"scores": {
"A": -10.4375,
"B": -10.5078125,
"C": -10.0859375,
"D": -12.4375,
"E": -12.859375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.3125,
"scores": {
"A": -9.8125,
"B": -13.125,
"C": -11.859375,
"D": -12.8671875,
"E": -13.4140625
}
}
},
{
"ex_id": "aqua-test-138",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.1328125,
"scores": {
"A": -11.515625,
"B": -11.3828125,
"C": -13.8515625,
"D": -13.59375,
"E": -12.3359375
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 2.3984375,
"scores": {
"A": -9.328125,
"B": -11.9453125,
"C": -13.375,
"D": -12.328125,
"E": -11.7265625
}
}
},
{
"ex_id": "aqua-test-139",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -4.9921875,
"scores": {
"A": -11.2421875,
"B": -9.6640625,
"C": -11.1796875,
"D": -12.578125,
"E": -14.65625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.05859375,
"scores": {
"A": -7.59765625,
"B": -9.390625,
"C": -10.296875,
"D": -9.796875,
"E": -11.65625
}
}
},
{
"ex_id": "aqua-test-140",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.09375,
"scores": {
"A": -12.171875,
"B": -10.953125,
"C": -12.484375,
"D": -12.046875,
"E": -12.828125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.09375,
"scores": {
"A": -8.640625,
"B": -9.734375,
"C": -10.828125,
"D": -10.234375,
"E": -11.625
}
}
},
{
"ex_id": "aqua-test-141",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.390625,
"scores": {
"A": -15.65625,
"B": -14.0,
"C": -12.3359375,
"D": -12.7265625,
"E": -13.421875
}
},
"ablated_1": {
"pred_label": "E",
"correct": false,
"margin": -1.5078125,
"scores": {
"A": -9.03125,
"B": -10.1640625,
"C": -9.59375,
"D": -8.109375,
"E": -8.0859375
}
}
},
{
"ex_id": "aqua-test-142",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 0.0390625,
"scores": {
"A": -12.0546875,
"B": -12.09375,
"C": -12.2421875,
"D": -13.71875,
"E": -13.640625
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 2.3515625,
"scores": {
"A": -10.3203125,
"B": -13.515625,
"C": -12.671875,
"D": -13.125,
"E": -14.1875
}
}
},
{
"ex_id": "aqua-test-143",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.2109375,
"scores": {
"A": -10.8515625,
"B": -9.3515625,
"C": -10.453125,
"D": -10.5625,
"E": -10.328125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.875,
"scores": {
"A": -8.78125,
"B": -11.7734375,
"C": -12.0390625,
"D": -10.65625,
"E": -11.0703125
}
}
},
{
"ex_id": "aqua-test-144",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.5234375,
"scores": {
"A": -14.15625,
"B": -13.6328125,
"C": -16.546875,
"D": -16.75,
"E": -16.03125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 7.6953125,
"scores": {
"A": -6.4453125,
"B": -14.140625,
"C": -17.203125,
"D": -14.1953125,
"E": -15.3984375
}
}
},
{
"ex_id": "aqua-test-145",
"gold": "B",
"baseline": {
"pred_label": "E",
"correct": false,
"margin": -0.125,
"scores": {
"A": -13.59375,
"B": -11.84375,
"C": -14.28125,
"D": -13.25,
"E": -11.71875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.28125,
"scores": {
"A": -8.40625,
"B": -11.6875,
"C": -15.328125,
"D": -13.34375,
"E": -12.859375
}
}
},
{
"ex_id": "aqua-test-146",
"gold": "B",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -1.6953125,
"scores": {
"A": -9.765625,
"B": -11.140625,
"C": -9.4453125,
"D": -10.703125,
"E": -9.9375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.390625,
"scores": {
"A": -8.3828125,
"B": -10.7734375,
"C": -14.0390625,
"D": -13.5546875,
"E": -13.0390625
}
}
},
{
"ex_id": "aqua-test-147",
"gold": "E",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -0.1484375,
"scores": {
"A": -9.875,
"B": -10.65625,
"C": -11.03125,
"D": -12.515625,
"E": -10.0234375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.203125,
"scores": {
"A": -6.390625,
"B": -8.140625,
"C": -9.0703125,
"D": -10.21875,
"E": -8.59375
}
}
},
{
"ex_id": "aqua-test-148",
"gold": "D",
"baseline": {
"pred_label": "D",
"correct": true,
"margin": 0.09375,
"scores": {
"A": -11.84375,
"B": -8.6875,
"C": -9.390625,
"D": -8.59375,
"E": -10.328125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -0.2890625,
"scores": {
"A": -8.21875,
"B": -9.21875,
"C": -9.4140625,
"D": -8.5078125,
"E": -9.5859375
}
}
},
{
"ex_id": "aqua-test-149",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.984375,
"scores": {
"A": -11.984375,
"B": -10.328125,
"C": -11.265625,
"D": -11.3125,
"E": -11.625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.5078125,
"scores": {
"A": -7.390625,
"B": -10.25,
"C": -13.9375,
"D": -13.8984375,
"E": -15.1875
}
}
},
{
"ex_id": "aqua-test-150",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.9140625,
"scores": {
"A": -12.328125,
"B": -9.484375,
"C": -10.9921875,
"D": -12.3984375,
"E": -12.6328125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -5.38671875,
"scores": {
"A": -7.01953125,
"B": -12.03125,
"C": -12.453125,
"D": -12.40625,
"E": -13.796875
}
}
},
{
"ex_id": "aqua-test-151",
"gold": "C",
"baseline": {
"pred_label": "E",
"correct": false,
"margin": -2.625,
"scores": {
"A": -15.140625,
"B": -13.625,
"C": -14.5625,
"D": -15.296875,
"E": -11.9375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.1484375,
"scores": {
"A": -10.265625,
"B": -11.4375,
"C": -13.4140625,
"D": -11.515625,
"E": -12.484375
}
}
},
{
"ex_id": "aqua-test-152",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.65625,
"scores": {
"A": -12.15625,
"B": -11.09375,
"C": -11.75,
"D": -11.765625,
"E": -11.75
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.6015625,
"scores": {
"A": -8.2734375,
"B": -12.875,
"C": -16.15625,
"D": -13.109375,
"E": -13.90625
}
}
},
{
"ex_id": "aqua-test-153",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 0.46875,
"scores": {
"A": -10.4609375,
"B": -10.9296875,
"C": -12.1640625,
"D": -15.109375,
"E": -13.3125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 3.375,
"scores": {
"A": -5.125,
"B": -8.75,
"C": -8.5,
"D": -12.921875,
"E": -12.109375
}
}
},
{
"ex_id": "aqua-test-154",
"gold": "D",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -4.9921875,
"scores": {
"A": -10.4296875,
"B": -11.890625,
"C": -11.7890625,
"D": -15.421875,
"E": -13.53125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.6875,
"scores": {
"A": -6.578125,
"B": -9.6484375,
"C": -11.0078125,
"D": -11.265625,
"E": -11.0859375
}
}
},
{
"ex_id": "aqua-test-155",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -3.25,
"scores": {
"A": -11.7578125,
"B": -10.15625,
"C": -8.5078125,
"D": -12.3046875,
"E": -12.296875
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 1.6953125,
"scores": {
"A": -6.5625,
"B": -9.1015625,
"C": -8.2578125,
"D": -9.484375,
"E": -8.8671875
}
}
},
{
"ex_id": "aqua-test-156",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.546875,
"scores": {
"A": -11.046875,
"B": -9.5,
"C": -10.5390625,
"D": -10.7265625,
"E": -10.1328125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 0.546875,
"scores": {
"A": -8.578125,
"B": -9.53125,
"C": -10.578125,
"D": -9.21875,
"E": -9.125
}
}
},
{
"ex_id": "aqua-test-157",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.28125,
"scores": {
"A": -10.46875,
"B": -8.765625,
"C": -9.046875,
"D": -9.90625,
"E": -10.0625
}
},
"ablated_1": {
"pred_label": "E",
"correct": false,
"margin": -1.28125,
"scores": {
"A": -7.140625,
"B": -8.390625,
"C": -8.2734375,
"D": -7.359375,
"E": -6.9921875
}
}
},
{
"ex_id": "aqua-test-158",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.6171875,
"scores": {
"A": -11.875,
"B": -9.8671875,
"C": -12.375,
"D": -12.484375,
"E": -12.4375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -0.828125,
"scores": {
"A": -7.9453125,
"B": -10.96875,
"C": -9.1328125,
"D": -8.7734375,
"E": -9.625
}
}
},
{
"ex_id": "aqua-test-159",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.203125,
"scores": {
"A": -14.390625,
"B": -10.96875,
"C": -12.171875,
"D": -14.21875,
"E": -13.125
}
},
"ablated_1": {
"pred_label": "D",
"correct": false,
"margin": -1.5625,
"scores": {
"A": -10.09375,
"B": -10.09375,
"C": -11.546875,
"D": -9.984375,
"E": -10.1171875
}
}
},
{
"ex_id": "aqua-test-160",
"gold": "D",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -2.859375,
"scores": {
"A": -11.5859375,
"B": -14.2109375,
"C": -14.4609375,
"D": -14.4453125,
"E": -14.359375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.6953125,
"scores": {
"A": -7.6484375,
"B": -11.265625,
"C": -12.71875,
"D": -12.34375,
"E": -12.890625
}
}
},
{
"ex_id": "aqua-test-161",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.1953125,
"scores": {
"A": -14.890625,
"B": -10.3125,
"C": -12.5078125,
"D": -12.6953125,
"E": -15.1796875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.21875,
"scores": {
"A": -6.8515625,
"B": -7.0078125,
"C": -9.0703125,
"D": -9.6484375,
"E": -9.453125
}
}
},
{
"ex_id": "aqua-test-162",
"gold": "C",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -2.234375,
"scores": {
"A": -10.1484375,
"B": -10.5,
"C": -12.3828125,
"D": -11.859375,
"E": -11.9921875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.578125,
"scores": {
"A": -5.546875,
"B": -10.59375,
"C": -10.125,
"D": -10.140625,
"E": -11.59375
}
}
},
{
"ex_id": "aqua-test-163",
"gold": "C",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -1.34375,
"scores": {
"A": -10.15625,
"B": -10.65625,
"C": -11.5,
"D": -14.0625,
"E": -11.96875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.609375,
"scores": {
"A": -7.6796875,
"B": -11.90625,
"C": -12.2890625,
"D": -12.3671875,
"E": -10.6640625
}
}
},
{
"ex_id": "aqua-test-164",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.0234375,
"scores": {
"A": -11.75,
"B": -11.3671875,
"C": -12.578125,
"D": -14.390625,
"E": -13.609375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.5,
"scores": {
"A": -10.015625,
"B": -13.3125,
"C": -14.359375,
"D": -12.515625,
"E": -13.640625
}
}
},
{
"ex_id": "aqua-test-165",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.390625,
"scores": {
"A": -13.828125,
"B": -11.390625,
"C": -14.1875,
"D": -12.78125,
"E": -14.859375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.03125,
"scores": {
"A": -7.125,
"B": -8.75,
"C": -13.1328125,
"D": -13.15625,
"E": -14.8671875
}
}
},
{
"ex_id": "aqua-test-166",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.734375,
"scores": {
"A": -13.2265625,
"B": -8.90625,
"C": -9.71875,
"D": -10.8046875,
"E": -11.640625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -5.4296875,
"scores": {
"A": -6.2421875,
"B": -8.828125,
"C": -9.09375,
"D": -9.59375,
"E": -11.671875
}
}
},
{
"ex_id": "aqua-test-167",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 2.28125,
"scores": {
"A": -13.3125,
"B": -10.640625,
"C": -12.921875,
"D": -16.09375,
"E": -14.75
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.328125,
"scores": {
"A": -8.015625,
"B": -11.34375,
"C": -15.125,
"D": -13.3125,
"E": -14.375
}
}
},
{
"ex_id": "aqua-test-168",
"gold": "D",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -1.59375,
"scores": {
"A": -11.9375,
"B": -12.078125,
"C": -12.59375,
"D": -13.53125,
"E": -13.71875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.8125,
"scores": {
"A": -8.265625,
"B": -12.84375,
"C": -14.609375,
"D": -13.078125,
"E": -14.4375
}
}
},
{
"ex_id": "aqua-test-169",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.265625,
"scores": {
"A": -11.6953125,
"B": -9.4296875,
"C": -11.6328125,
"D": -13.53125,
"E": -12.4609375
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 1.140625,
"scores": {
"A": -9.15625,
"B": -10.296875,
"C": -10.46875,
"D": -11.28125,
"E": -11.203125
}
}
},
{
"ex_id": "aqua-test-170",
"gold": "E",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -1.6953125,
"scores": {
"A": -12.6796875,
"B": -13.546875,
"C": -15.734375,
"D": -14.1875,
"E": -14.375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.3984375,
"scores": {
"A": -7.4453125,
"B": -11.609375,
"C": -12.671875,
"D": -10.328125,
"E": -10.84375
}
}
},
{
"ex_id": "aqua-test-171",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.3203125,
"scores": {
"A": -12.734375,
"B": -11.4140625,
"C": -12.1015625,
"D": -14.3125,
"E": -13.2265625
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 3.89453125,
"scores": {
"A": -7.27734375,
"B": -11.171875,
"C": -14.109375,
"D": -13.875,
"E": -14.140625
}
}
},
{
"ex_id": "aqua-test-172",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.5546875,
"scores": {
"A": -11.8984375,
"B": -9.34375,
"C": -9.6796875,
"D": -12.5859375,
"E": -11.3515625
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 0.265625,
"scores": {
"A": -7.75,
"B": -8.015625,
"C": -8.984375,
"D": -10.234375,
"E": -8.7734375
}
}
},
{
"ex_id": "aqua-test-173",
"gold": "D",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -4.578125,
"scores": {
"A": -9.578125,
"B": -13.25,
"C": -13.359375,
"D": -14.15625,
"E": -12.0625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -7.5,
"scores": {
"A": -7.09375,
"B": -12.1875,
"C": -14.15625,
"D": -14.59375,
"E": -14.734375
}
}
},
{
"ex_id": "aqua-test-174",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.25,
"scores": {
"A": -14.1875,
"B": -13.484375,
"C": -14.6875,
"D": -13.5625,
"E": -13.734375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -5.984375,
"scores": {
"A": -8.5,
"B": -11.28125,
"C": -14.546875,
"D": -12.5625,
"E": -14.484375
}
}
},
{
"ex_id": "aqua-test-175",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.3828125,
"scores": {
"A": -12.578125,
"B": -11.2734375,
"C": -12.71875,
"D": -13.6171875,
"E": -12.65625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.2109375,
"scores": {
"A": -7.8359375,
"B": -12.359375,
"C": -15.453125,
"D": -11.6484375,
"E": -14.046875
}
}
},
{
"ex_id": "aqua-test-176",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.796875,
"scores": {
"A": -10.96875,
"B": -10.25,
"C": -13.046875,
"D": -13.21875,
"E": -11.5625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.046875,
"scores": {
"A": -8.546875,
"B": -10.9375,
"C": -12.59375,
"D": -10.203125,
"E": -10.328125
}
}
},
{
"ex_id": "aqua-test-177",
"gold": "C",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -4.09375,
"scores": {
"A": -9.609375,
"B": -9.890625,
"C": -13.703125,
"D": -11.875,
"E": -11.5
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.8359375,
"scores": {
"A": -5.1953125,
"B": -7.3671875,
"C": -12.03125,
"D": -9.5625,
"E": -11.109375
}
}
},
{
"ex_id": "aqua-test-178",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.4609375,
"scores": {
"A": -13.1953125,
"B": -11.1015625,
"C": -12.8203125,
"D": -12.625,
"E": -10.640625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.72265625,
"scores": {
"A": -5.77734375,
"B": -10.375,
"C": -8.5,
"D": -10.84375,
"E": -12.5
}
}
},
{
"ex_id": "aqua-test-179",
"gold": "B",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -1.5859375,
"scores": {
"A": -9.1640625,
"B": -10.75,
"C": -11.7734375,
"D": -12.671875,
"E": -12.8046875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.15625,
"scores": {
"A": -5.9375,
"B": -8.09375,
"C": -10.34375,
"D": -10.609375,
"E": -12.296875
}
}
},
{
"ex_id": "aqua-test-180",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.5078125,
"scores": {
"A": -11.15625,
"B": -9.8515625,
"C": -12.359375,
"D": -13.1796875,
"E": -12.3828125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.53125,
"scores": {
"A": -10.609375,
"B": -11.90625,
"C": -12.140625,
"D": -11.0,
"E": -11.59375
}
}
},
{
"ex_id": "aqua-test-181",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.7734375,
"scores": {
"A": -9.6328125,
"B": -8.859375,
"C": -11.828125,
"D": -11.640625,
"E": -10.6875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.5078125,
"scores": {
"A": -7.5546875,
"B": -9.0625,
"C": -10.4453125,
"D": -9.140625,
"E": -9.0078125
}
}
},
{
"ex_id": "aqua-test-182",
"gold": "C",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -3.8203125,
"scores": {
"A": -11.703125,
"B": -15.640625,
"C": -15.5234375,
"D": -15.2578125,
"E": -15.8046875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.4765625,
"scores": {
"A": -7.3046875,
"B": -11.609375,
"C": -13.78125,
"D": -14.828125,
"E": -15.296875
}
}
},
{
"ex_id": "aqua-test-183",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.40625,
"scores": {
"A": -12.6484375,
"B": -9.3671875,
"C": -10.7734375,
"D": -13.140625,
"E": -13.125
}
},
"ablated_1": {
"pred_label": "C",
"correct": false,
"margin": -0.71875,
"scores": {
"A": -8.6640625,
"B": -7.5078125,
"C": -6.7890625,
"D": -9.3046875,
"E": -10.7578125
}
}
},
{
"ex_id": "aqua-test-184",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.1875,
"scores": {
"A": -11.359375,
"B": -9.9375,
"C": -11.1875,
"D": -13.125,
"E": -12.328125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.0703125,
"scores": {
"A": -8.7109375,
"B": -11.3828125,
"C": -11.5,
"D": -10.78125,
"E": -11.2578125
}
}
},
{
"ex_id": "aqua-test-185",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.1796875,
"scores": {
"A": -11.375,
"B": -9.40625,
"C": -12.5859375,
"D": -12.453125,
"E": -12.6484375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -8.36328125,
"scores": {
"A": -5.95703125,
"B": -12.796875,
"C": -14.3203125,
"D": -13.2578125,
"E": -14.75
}
}
},
{
"ex_id": "aqua-test-186",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 0.40625,
"scores": {
"A": -11.265625,
"B": -11.671875,
"C": -14.078125,
"D": -14.6875,
"E": -13.640625
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 2.421875,
"scores": {
"A": -7.59375,
"B": -10.015625,
"C": -13.9375,
"D": -12.765625,
"E": -12.796875
}
}
},
{
"ex_id": "aqua-test-187",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.078125,
"scores": {
"A": -12.546875,
"B": -11.0,
"C": -13.078125,
"D": -14.6171875,
"E": -14.2265625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.29296875,
"scores": {
"A": -7.48828125,
"B": -9.2890625,
"C": -10.78125,
"D": -11.15625,
"E": -11.203125
}
}
},
{
"ex_id": "aqua-test-188",
"gold": "E",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -3.125,
"scores": {
"A": -12.140625,
"B": -13.03125,
"C": -13.734375,
"D": -14.09375,
"E": -15.265625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -5.21875,
"scores": {
"A": -11.125,
"B": -13.6875,
"C": -16.390625,
"D": -12.75,
"E": -16.34375
}
}
},
{
"ex_id": "aqua-test-189",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.2109375,
"scores": {
"A": -14.5390625,
"B": -11.546875,
"C": -11.8046875,
"D": -11.7578125,
"E": -13.34375
}
},
"ablated_1": {
"pred_label": "D",
"correct": false,
"margin": -0.84375,
"scores": {
"A": -9.875,
"B": -10.3828125,
"C": -10.28125,
"D": -9.5390625,
"E": -9.5625
}
}
},
{
"ex_id": "aqua-test-190",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.34375,
"scores": {
"A": -13.6015625,
"B": -10.7734375,
"C": -10.4296875,
"D": -13.3671875,
"E": -13.9296875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.5,
"scores": {
"A": -6.3515625,
"B": -9.2578125,
"C": -12.8515625,
"D": -10.84375,
"E": -12.953125
}
}
},
{
"ex_id": "aqua-test-191",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -11.890625,
"B": -10.9375,
"C": -13.640625,
"D": -14.109375,
"E": -13.765625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.09375,
"scores": {
"A": -6.4375,
"B": -8.53125,
"C": -11.7734375,
"D": -11.28125,
"E": -12.3203125
}
}
},
{
"ex_id": "aqua-test-192",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -0.125,
"scores": {
"A": -13.6953125,
"B": -13.671875,
"C": -13.5703125,
"D": -14.453125,
"E": -13.6171875
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 1.6875,
"scores": {
"A": -8.875,
"B": -12.59375,
"C": -13.15625,
"D": -11.125,
"E": -10.5625
}
}
},
{
"ex_id": "aqua-test-193",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.7734375,
"scores": {
"A": -11.515625,
"B": -8.7421875,
"C": -9.84375,
"D": -10.28125,
"E": -10.03125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 1.2734375,
"scores": {
"A": -8.5703125,
"B": -10.296875,
"C": -10.296875,
"D": -9.84375,
"E": -9.953125
}
}
},
{
"ex_id": "aqua-test-194",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.15625,
"scores": {
"A": -9.4609375,
"B": -9.4140625,
"C": -10.640625,
"D": -11.5703125,
"E": -12.359375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.71484375,
"scores": {
"A": -7.16796875,
"B": -12.7421875,
"C": -14.03125,
"D": -13.8828125,
"E": -15.5
}
}
},
{
"ex_id": "aqua-test-195",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -4.0625,
"scores": {
"A": -11.25,
"B": -8.28125,
"C": -10.234375,
"D": -12.34375,
"E": -12.640625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.2109375,
"scores": {
"A": -8.0546875,
"B": -9.1796875,
"C": -9.2890625,
"D": -10.265625,
"E": -10.5703125
}
}
},
{
"ex_id": "aqua-test-196",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.8359375,
"scores": {
"A": -12.8125,
"B": -10.9765625,
"C": -11.9453125,
"D": -12.734375,
"E": -14.8203125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 1.40625,
"scores": {
"A": -9.046875,
"B": -11.1953125,
"C": -11.7734375,
"D": -10.453125,
"E": -12.75
}
}
},
{
"ex_id": "aqua-test-197",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.6953125,
"scores": {
"A": -10.984375,
"B": -8.9375,
"C": -9.6328125,
"D": -9.9453125,
"E": -10.46875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.484375,
"scores": {
"A": -8.5390625,
"B": -10.0703125,
"C": -12.0234375,
"D": -8.8046875,
"E": -9.2734375
}
}
},
{
"ex_id": "aqua-test-198",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.421875,
"scores": {
"A": -11.6015625,
"B": -9.5,
"C": -9.921875,
"D": -13.40625,
"E": -11.8359375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -0.078125,
"scores": {
"A": -9.3125,
"B": -11.125,
"C": -9.390625,
"D": -11.015625,
"E": -11.4375
}
}
},
{
"ex_id": "aqua-test-199",
"gold": "C",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -5.21875,
"scores": {
"A": -10.2578125,
"B": -14.28125,
"C": -15.4765625,
"D": -16.046875,
"E": -13.0859375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -5.1484375,
"scores": {
"A": -9.6484375,
"B": -12.765625,
"C": -14.796875,
"D": -14.5625,
"E": -15.265625
}
}
},
{
"ex_id": "aqua-test-200",
"gold": "B",
"baseline": {
"pred_label": "E",
"correct": false,
"margin": -0.28125,
"scores": {
"A": -12.2578125,
"B": -11.9921875,
"C": -13.625,
"D": -12.015625,
"E": -11.7109375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.265625,
"scores": {
"A": -7.9140625,
"B": -11.1796875,
"C": -13.5546875,
"D": -9.984375,
"E": -9.2578125
}
}
},
{
"ex_id": "aqua-test-201",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.5546875,
"scores": {
"A": -10.8125,
"B": -10.2578125,
"C": -10.46875,
"D": -11.625,
"E": -12.4375
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 1.890625,
"scores": {
"A": -9.453125,
"B": -11.8671875,
"C": -13.890625,
"D": -11.34375,
"E": -14.578125
}
}
},
{
"ex_id": "aqua-test-202",
"gold": "B",
"baseline": {
"pred_label": "E",
"correct": false,
"margin": -1.6875,
"scores": {
"A": -11.5625,
"B": -11.890625,
"C": -10.765625,
"D": -11.921875,
"E": -10.203125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.4921875,
"scores": {
"A": -6.8671875,
"B": -10.359375,
"C": -13.8203125,
"D": -13.0078125,
"E": -13.609375
}
}
},
{
"ex_id": "aqua-test-203",
"gold": "D",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.9140625,
"scores": {
"A": -9.90625,
"B": -9.8125,
"C": -8.4921875,
"D": -11.40625,
"E": -10.0
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -0.9296875,
"scores": {
"A": -8.015625,
"B": -9.7578125,
"C": -10.0546875,
"D": -8.9453125,
"E": -10.2265625
}
}
},
{
"ex_id": "aqua-test-204",
"gold": "C",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -0.0625,
"scores": {
"A": -9.65625,
"B": -9.9765625,
"C": -9.71875,
"D": -11.1640625,
"E": -10.765625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.6171875,
"scores": {
"A": -5.9921875,
"B": -10.0,
"C": -10.609375,
"D": -11.453125,
"E": -11.1875
}
}
},
{
"ex_id": "aqua-test-205",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.75,
"scores": {
"A": -9.7109375,
"B": -8.2734375,
"C": -10.3828125,
"D": -11.78125,
"E": -11.0234375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.7421875,
"scores": {
"A": -7.8359375,
"B": -9.9921875,
"C": -11.4921875,
"D": -11.3125,
"E": -12.578125
}
}
},
{
"ex_id": "aqua-test-206",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.1796875,
"scores": {
"A": -11.6171875,
"B": -10.96875,
"C": -11.1484375,
"D": -12.84375,
"E": -14.0
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.03125,
"scores": {
"A": -8.0,
"B": -11.03125,
"C": -12.328125,
"D": -11.765625,
"E": -14.171875
}
}
},
{
"ex_id": "aqua-test-207",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -3.328125,
"scores": {
"A": -12.921875,
"B": -13.3125,
"C": -10.84375,
"D": -15.78125,
"E": -14.171875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -8.30078125,
"scores": {
"A": -6.63671875,
"B": -11.015625,
"C": -12.171875,
"D": -15.0703125,
"E": -14.9375
}
}
},
{
"ex_id": "aqua-test-208",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.8125,
"scores": {
"A": -12.0625,
"B": -10.109375,
"C": -11.71875,
"D": -10.921875,
"E": -13.046875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.703125,
"scores": {
"A": -9.796875,
"B": -11.140625,
"C": -13.078125,
"D": -11.5,
"E": -12.078125
}
}
},
{
"ex_id": "aqua-test-209",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.9453125,
"scores": {
"A": -10.890625,
"B": -9.78125,
"C": -13.171875,
"D": -10.203125,
"E": -8.8359375
}
},
"ablated_1": {
"pred_label": "E",
"correct": true,
"margin": 0.015625,
"scores": {
"A": -6.84375,
"B": -8.828125,
"C": -11.0234375,
"D": -7.9375,
"E": -6.828125
}
}
},
{
"ex_id": "aqua-test-210",
"gold": "E",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -0.546875,
"scores": {
"A": -10.8125,
"B": -10.8359375,
"C": -10.90625,
"D": -12.609375,
"E": -11.359375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -0.6875,
"scores": {
"A": -8.609375,
"B": -10.140625,
"C": -9.984375,
"D": -9.515625,
"E": -9.296875
}
}
},
{
"ex_id": "aqua-test-211",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.5234375,
"scores": {
"A": -9.2265625,
"B": -8.8125,
"C": -10.7890625,
"D": -11.3359375,
"E": -9.8671875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.296875,
"scores": {
"A": -6.6015625,
"B": -7.9453125,
"C": -10.0703125,
"D": -8.8984375,
"E": -8.984375
}
}
},
{
"ex_id": "aqua-test-212",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.5078125,
"scores": {
"A": -11.59375,
"B": -9.609375,
"C": -11.1171875,
"D": -11.7421875,
"E": -12.8359375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.390625,
"scores": {
"A": -4.984375,
"B": -8.375,
"C": -11.28125,
"D": -9.4140625,
"E": -11.890625
}
}
},
{
"ex_id": "aqua-test-213",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.734375,
"scores": {
"A": -13.40625,
"B": -12.515625,
"C": -15.25,
"D": -15.03125,
"E": -13.96875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.60546875,
"scores": {
"A": -7.22265625,
"B": -11.375,
"C": -13.828125,
"D": -12.1875,
"E": -12.0625
}
}
},
{
"ex_id": "aqua-test-214",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -3.265625,
"scores": {
"A": -10.734375,
"B": -9.25,
"C": -8.390625,
"D": -11.65625,
"E": -11.65625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.6640625,
"scores": {
"A": -7.3203125,
"B": -10.78125,
"C": -10.140625,
"D": -9.6875,
"E": -10.984375
}
}
},
{
"ex_id": "aqua-test-215",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -6.78125,
"scores": {
"A": -23.0,
"B": -18.140625,
"C": -16.21875,
"D": -22.125,
"E": -22.1875
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 0.6640625,
"scores": {
"A": -8.2734375,
"B": -9.5625,
"C": -8.9375,
"D": -11.0078125,
"E": -11.5234375
}
}
},
{
"ex_id": "aqua-test-216",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.640625,
"scores": {
"A": -12.78125,
"B": -9.7421875,
"C": -10.3828125,
"D": -12.75,
"E": -12.796875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.94140625,
"scores": {
"A": -7.49609375,
"B": -9.6796875,
"C": -10.4375,
"D": -10.078125,
"E": -10.5
}
}
},
{
"ex_id": "aqua-test-217",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.421875,
"scores": {
"A": -12.84375,
"B": -8.609375,
"C": -12.046875,
"D": -15.5625,
"E": -12.03125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.375,
"scores": {
"A": -6.671875,
"B": -7.4453125,
"C": -9.890625,
"D": -10.3359375,
"E": -9.046875
}
}
},
{
"ex_id": "aqua-test-218",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 0.453125,
"scores": {
"A": -9.890625,
"B": -10.34375,
"C": -11.4609375,
"D": -12.203125,
"E": -11.3203125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 2.921875,
"scores": {
"A": -6.703125,
"B": -9.625,
"C": -12.21875,
"D": -12.3125,
"E": -12.875
}
}
},
{
"ex_id": "aqua-test-219",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.8359375,
"scores": {
"A": -10.765625,
"B": -10.7578125,
"C": -11.59375,
"D": -11.3125,
"E": -10.90625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -5.4140625,
"scores": {
"A": -6.5859375,
"B": -10.734375,
"C": -12.0,
"D": -12.125,
"E": -13.234375
}
}
},
{
"ex_id": "aqua-test-220",
"gold": "E",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -1.40625,
"scores": {
"A": -9.5625,
"B": -10.84375,
"C": -12.40625,
"D": -12.21875,
"E": -10.96875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -8.08203125,
"scores": {
"A": -6.37890625,
"B": -12.828125,
"C": -14.328125,
"D": -11.8828125,
"E": -14.4609375
}
}
},
{
"ex_id": "aqua-test-221",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.1875,
"scores": {
"A": -13.28125,
"B": -8.890625,
"C": -8.703125,
"D": -11.203125,
"E": -12.5625
}
},
"ablated_1": {
"pred_label": "C",
"correct": true,
"margin": 0.25,
"scores": {
"A": -7.98828125,
"B": -8.1484375,
"C": -7.68359375,
"D": -9.546875,
"E": -7.93359375
}
}
},
{
"ex_id": "aqua-test-222",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.8828125,
"scores": {
"A": -10.78125,
"B": -9.4921875,
"C": -12.015625,
"D": -13.375,
"E": -14.171875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.6484375,
"scores": {
"A": -6.34375,
"B": -8.3671875,
"C": -11.7265625,
"D": -10.9921875,
"E": -11.59375
}
}
},
{
"ex_id": "aqua-test-223",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.09375,
"scores": {
"A": -10.265625,
"B": -7.1484375,
"C": -10.2421875,
"D": -10.921875,
"E": -11.3359375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.09765625,
"scores": {
"A": -4.67578125,
"B": -8.7734375,
"C": -12.375,
"D": -11.1796875,
"E": -13.109375
}
}
},
{
"ex_id": "aqua-test-224",
"gold": "D",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -3.328125,
"scores": {
"A": -9.65625,
"B": -10.03125,
"C": -10.046875,
"D": -12.984375,
"E": -13.359375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -7.765625,
"scores": {
"A": -6.25,
"B": -9.96875,
"C": -11.75,
"D": -14.015625,
"E": -14.109375
}
}
},
{
"ex_id": "aqua-test-225",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.9609375,
"scores": {
"A": -9.90625,
"B": -7.4609375,
"C": -8.71875,
"D": -10.421875,
"E": -10.09375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.33984375,
"scores": {
"A": -7.42578125,
"B": -8.734375,
"C": -9.703125,
"D": -8.765625,
"E": -10.296875
}
}
},
{
"ex_id": "aqua-test-226",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -0.8046875,
"scores": {
"A": -9.1015625,
"B": -8.953125,
"C": -8.7265625,
"D": -10.828125,
"E": -9.53125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.74609375,
"scores": {
"A": -5.49609375,
"B": -8.390625,
"C": -9.40625,
"D": -8.8828125,
"E": -8.2421875
}
}
},
{
"ex_id": "aqua-test-227",
"gold": "B",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.2109375,
"scores": {
"A": -14.609375,
"B": -13.5625,
"C": -11.3515625,
"D": -11.515625,
"E": -11.7890625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.7578125,
"scores": {
"A": -8.953125,
"B": -13.7109375,
"C": -14.03125,
"D": -10.578125,
"E": -14.6484375
}
}
},
{
"ex_id": "aqua-test-228",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.078125,
"scores": {
"A": -13.3125,
"B": -10.71875,
"C": -10.796875,
"D": -14.0859375,
"E": -14.8984375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.28125,
"scores": {
"A": -7.171875,
"B": -8.453125,
"C": -9.515625,
"D": -10.125,
"E": -8.953125
}
}
},
{
"ex_id": "aqua-test-229",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -4.25,
"scores": {
"A": -13.28125,
"B": -9.953125,
"C": -14.203125,
"D": -13.296875,
"E": -10.6171875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.5546875,
"scores": {
"A": -7.2265625,
"B": -8.6875,
"C": -11.78125,
"D": -8.859375,
"E": -8.71875
}
}
},
{
"ex_id": "aqua-test-230",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.390625,
"scores": {
"A": -9.8125,
"B": -9.7109375,
"C": -10.1015625,
"D": -11.03125,
"E": -11.2265625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.8515625,
"scores": {
"A": -7.59375,
"B": -10.15625,
"C": -10.4453125,
"D": -8.6953125,
"E": -9.484375
}
}
},
{
"ex_id": "aqua-test-231",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -1.5703125,
"scores": {
"A": -14.2109375,
"B": -11.6015625,
"C": -10.7421875,
"D": -14.015625,
"E": -12.3125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.26171875,
"scores": {
"A": -6.23828125,
"B": -7.73046875,
"C": -7.69921875,
"D": -8.921875,
"E": -8.5
}
}
},
{
"ex_id": "aqua-test-232",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.390625,
"scores": {
"A": -10.4296875,
"B": -8.25,
"C": -10.5078125,
"D": -10.8671875,
"E": -11.640625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.82421875,
"scores": {
"A": -6.18359375,
"B": -8.03125,
"C": -9.046875,
"D": -8.5078125,
"E": -11.0078125
}
}
},
{
"ex_id": "aqua-test-233",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.7734375,
"scores": {
"A": -14.265625,
"B": -11.4921875,
"C": -12.5390625,
"D": -12.203125,
"E": -13.578125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 0.4296875,
"scores": {
"A": -9.125,
"B": -9.75,
"C": -10.7734375,
"D": -9.5546875,
"E": -11.28125
}
}
},
{
"ex_id": "aqua-test-234",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -1.75,
"scores": {
"A": -11.015625,
"B": -9.265625,
"C": -11.203125,
"D": -11.375,
"E": -11.359375
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 3.2421875,
"scores": {
"A": -5.5859375,
"B": -8.828125,
"C": -12.875,
"D": -9.28125,
"E": -11.328125
}
}
},
{
"ex_id": "aqua-test-235",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -2.0625,
"scores": {
"A": -10.9921875,
"B": -8.9296875,
"C": -10.15625,
"D": -11.8828125,
"E": -10.40625
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 1.46875,
"scores": {
"A": -7.265625,
"B": -8.734375,
"C": -9.3515625,
"D": -10.6875,
"E": -9.828125
}
}
},
{
"ex_id": "aqua-test-236",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.4375,
"scores": {
"A": -10.625,
"B": -10.1875,
"C": -10.4375,
"D": -13.5,
"E": -11.234375
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 2.484375,
"scores": {
"A": -7.21875,
"B": -9.703125,
"C": -9.8125,
"D": -12.1015625,
"E": -12.265625
}
}
},
{
"ex_id": "aqua-test-237",
"gold": "D",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -2.578125,
"scores": {
"A": -14.484375,
"B": -14.9453125,
"C": -18.734375,
"D": -17.0625,
"E": -17.859375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.2421875,
"scores": {
"A": -8.6171875,
"B": -12.8515625,
"C": -15.703125,
"D": -11.859375,
"E": -14.546875
}
}
},
{
"ex_id": "aqua-test-238",
"gold": "B",
"baseline": {
"pred_label": "D",
"correct": false,
"margin": -0.734375,
"scores": {
"A": -12.953125,
"B": -12.265625,
"C": -12.90625,
"D": -11.53125,
"E": -13.203125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.7265625,
"scores": {
"A": -8.828125,
"B": -11.5546875,
"C": -12.4140625,
"D": -10.3671875,
"E": -12.140625
}
}
},
{
"ex_id": "aqua-test-239",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -1.515625,
"scores": {
"A": -10.2265625,
"B": -9.2734375,
"C": -8.7109375,
"D": -12.0625,
"E": -11.703125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 3.328125,
"scores": {
"A": -6.8515625,
"B": -10.1796875,
"C": -11.1796875,
"D": -13.3125,
"E": -12.7421875
}
}
},
{
"ex_id": "aqua-test-240",
"gold": "E",
"baseline": {
"pred_label": "D",
"correct": false,
"margin": -0.4453125,
"scores": {
"A": -15.140625,
"B": -14.28125,
"C": -18.34375,
"D": -14.2265625,
"E": -14.671875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.265625,
"scores": {
"A": -9.40625,
"B": -11.6484375,
"C": -14.1875,
"D": -11.1015625,
"E": -12.671875
}
}
},
{
"ex_id": "aqua-test-241",
"gold": "D",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -4.3671875,
"scores": {
"A": -10.8203125,
"B": -8.8828125,
"C": -10.421875,
"D": -13.25,
"E": -12.390625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.8359375,
"scores": {
"A": -8.546875,
"B": -12.5546875,
"C": -12.1875,
"D": -12.3828125,
"E": -11.40625
}
}
},
{
"ex_id": "aqua-test-242",
"gold": "C",
"baseline": {
"pred_label": "A",
"correct": false,
"margin": -2.8828125,
"scores": {
"A": -9.3046875,
"B": -10.8671875,
"C": -12.1875,
"D": -12.9375,
"E": -11.6875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -7.625,
"scores": {
"A": -6.5625,
"B": -11.6875,
"C": -14.1875,
"D": -14.421875,
"E": -14.375
}
}
},
{
"ex_id": "aqua-test-243",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -6.3671875,
"scores": {
"A": -15.640625,
"B": -9.2734375,
"C": -11.265625,
"D": -13.8515625,
"E": -12.7578125
}
},
"ablated_1": {
"pred_label": "B",
"correct": false,
"margin": -1.5078125,
"scores": {
"A": -8.6640625,
"B": -7.15625,
"C": -9.625,
"D": -11.8359375,
"E": -12.53125
}
}
},
{
"ex_id": "aqua-test-244",
"gold": "A",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -2.359375,
"scores": {
"A": -13.7421875,
"B": -12.625,
"C": -11.3828125,
"D": -12.9453125,
"E": -12.703125
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 3.046875,
"scores": {
"A": -6.3125,
"B": -9.359375,
"C": -10.09375,
"D": -10.8984375,
"E": -12.4375
}
}
},
{
"ex_id": "aqua-test-245",
"gold": "C",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -0.359375,
"scores": {
"A": -11.796875,
"B": -11.140625,
"C": -11.5,
"D": -13.34375,
"E": -13.1171875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.48046875,
"scores": {
"A": -7.41015625,
"B": -13.3671875,
"C": -13.890625,
"D": -14.0625,
"E": -15.78125
}
}
},
{
"ex_id": "aqua-test-246",
"gold": "B",
"baseline": {
"pred_label": "E",
"correct": false,
"margin": -1.203125,
"scores": {
"A": -13.828125,
"B": -14.8046875,
"C": -15.125,
"D": -14.2890625,
"E": -13.6015625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.328125,
"scores": {
"A": -9.640625,
"B": -13.96875,
"C": -16.53125,
"D": -12.234375,
"E": -13.359375
}
}
},
{
"ex_id": "aqua-test-247",
"gold": "A",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -4.484375,
"scores": {
"A": -15.078125,
"B": -10.59375,
"C": -13.671875,
"D": -13.515625,
"E": -13.796875
}
},
"ablated_1": {
"pred_label": "A",
"correct": true,
"margin": 0.890625,
"scores": {
"A": -9.8046875,
"B": -11.3828125,
"C": -12.125,
"D": -10.6953125,
"E": -12.6640625
}
}
},
{
"ex_id": "aqua-test-248",
"gold": "D",
"baseline": {
"pred_label": "E",
"correct": false,
"margin": -3.4921875,
"scores": {
"A": -14.015625,
"B": -13.1328125,
"C": -13.9140625,
"D": -15.734375,
"E": -12.2421875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.1875,
"scores": {
"A": -8.6875,
"B": -9.5703125,
"C": -11.8046875,
"D": -11.875,
"E": -9.7734375
}
}
},
{
"ex_id": "aqua-test-249",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.828125,
"scores": {
"A": -10.328125,
"B": -8.859375,
"C": -8.03125,
"D": -11.640625,
"E": -11.109375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.59375,
"scores": {
"A": -5.9921875,
"B": -7.609375,
"C": -8.5859375,
"D": -8.4375,
"E": -8.6796875
}
}
},
{
"ex_id": "aqua-test-250",
"gold": "E",
"baseline": {
"pred_label": "C",
"correct": false,
"margin": -1.984375,
"scores": {
"A": -10.4921875,
"B": -11.6015625,
"C": -9.9921875,
"D": -14.578125,
"E": -11.9765625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -7.6015625,
"scores": {
"A": -8.0546875,
"B": -11.5625,
"C": -12.609375,
"D": -14.765625,
"E": -15.65625
}
}
},
{
"ex_id": "aqua-test-251",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.078125,
"scores": {
"A": -9.90625,
"B": -9.828125,
"C": -12.40625,
"D": -11.265625,
"E": -10.703125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.984375,
"scores": {
"A": -9.296875,
"B": -12.28125,
"C": -13.5,
"D": -12.09375,
"E": -11.140625
}
}
},
{
"ex_id": "aqua-test-252",
"gold": "C",
"baseline": {
"pred_label": "E",
"correct": false,
"margin": -0.1953125,
"scores": {
"A": -10.3671875,
"B": -9.515625,
"C": -9.65625,
"D": -10.5859375,
"E": -9.4609375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.25390625,
"scores": {
"A": -6.80859375,
"B": -8.46875,
"C": -9.0625,
"D": -9.125,
"E": -8.734375
}
}
},
{
"ex_id": "aqua-test-253",
"gold": "E",
"baseline": {
"pred_label": "B",
"correct": false,
"margin": -3.15625,
"scores": {
"A": -12.359375,
"B": -10.0625,
"C": -10.921875,
"D": -12.96875,
"E": -13.21875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -7.12890625,
"scores": {
"A": -4.81640625,
"B": -6.953125,
"C": -8.9140625,
"D": -9.578125,
"E": -11.9453125
}
}
}
],
"alpha_sweep_summary_on_flipset": {
"0.0": {
"n": 42,
"flip_rate": 0.0,
"ablated_acc": 1.0,
"pred_change_rate": 0.0,
"mean_margin": 0.9223400354385376,
"median_margin": 0.671875,
"mean_delta_margin_vs_baseline": 0.0,
"median_delta_margin_vs_baseline": 0.0
},
"0.05": {
"n": 42,
"flip_rate": 0.047619047619047616,
"ablated_acc": 0.9523809523809523,
"pred_change_rate": 0.047619047619047616,
"mean_margin": 0.918154776096344,
"median_margin": 0.69140625,
"mean_delta_margin_vs_baseline": -0.004185267724096775,
"median_delta_margin_vs_baseline": 0.0
},
"0.1": {
"n": 42,
"flip_rate": 0.09523809523809523,
"ablated_acc": 0.9047619047619048,
"pred_change_rate": 0.09523809523809523,
"mean_margin": 0.9194568395614624,
"median_margin": 0.66796875,
"mean_delta_margin_vs_baseline": -0.0028831844683736563,
"median_delta_margin_vs_baseline": 0.0
},
"0.2": {
"n": 42,
"flip_rate": 0.09523809523809523,
"ablated_acc": 0.9047619047619048,
"pred_change_rate": 0.09523809523809523,
"mean_margin": 0.919549822807312,
"median_margin": 0.64453125,
"mean_delta_margin_vs_baseline": -0.0027901786379516125,
"median_delta_margin_vs_baseline": -0.0078125
},
"0.5": {
"n": 42,
"flip_rate": 0.2857142857142857,
"ablated_acc": 0.7142857142857143,
"pred_change_rate": 0.2857142857142857,
"mean_margin": 0.603143572807312,
"median_margin": 0.37109375,
"mean_delta_margin_vs_baseline": -0.3191964328289032,
"median_delta_margin_vs_baseline": -0.34765625
},
"1.0": {
"n": 42,
"flip_rate": 1.0,
"ablated_acc": 0.0,
"pred_change_rate": 1.0,
"mean_margin": -2.7706472873687744,
"median_margin": -2.6171875,
"mean_delta_margin_vs_baseline": -3.6929874420166016,
"median_delta_margin_vs_baseline": -3.314453125
}
},
"alpha_sweep_rows_by_alpha": {
"0.0": [
{
"ex_id": "aqua-test-2",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.2578125,
"scores": {
"A": -11.234375,
"B": -10.2109375,
"C": -13.171875,
"D": -12.4453125,
"E": -10.46875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.2578125,
"scores": {
"A": -11.234375,
"B": -10.2109375,
"C": -13.171875,
"D": -12.4453125,
"E": -10.46875
}
},
"flip": false
},
{
"ex_id": "aqua-test-5",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -11.9921875,
"B": -10.9765625,
"C": -12.0390625,
"D": -11.9609375,
"E": -11.9296875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -11.9921875,
"B": -10.9765625,
"C": -12.0390625,
"D": -11.9609375,
"E": -11.9296875
}
},
"flip": false
},
{
"ex_id": "aqua-test-9",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.078125,
"scores": {
"A": -11.265625,
"B": -8.890625,
"C": -9.96875,
"D": -12.359375,
"E": -13.9921875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.078125,
"scores": {
"A": -11.265625,
"B": -8.890625,
"C": -9.96875,
"D": -12.359375,
"E": -13.9921875
}
},
"flip": false
},
{
"ex_id": "aqua-test-15",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.671875,
"scores": {
"A": -11.078125,
"B": -10.40625,
"C": -13.625,
"D": -15.3125,
"E": -13.8125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.671875,
"scores": {
"A": -11.078125,
"B": -10.40625,
"C": -13.625,
"D": -15.3125,
"E": -13.8125
}
},
"flip": false
},
{
"ex_id": "aqua-test-16",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 2.80078125,
"scores": {
"A": -12.484375,
"B": -10.515625,
"C": -7.71484375,
"D": -12.859375,
"E": -12.8125
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 2.80078125,
"scores": {
"A": -12.484375,
"B": -10.515625,
"C": -7.71484375,
"D": -12.859375,
"E": -12.8125
}
},
"flip": false
},
{
"ex_id": "aqua-test-21",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.765625,
"scores": {
"A": -10.4140625,
"B": -9.6484375,
"C": -12.5546875,
"D": -12.234375,
"E": -11.3828125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.765625,
"scores": {
"A": -10.4140625,
"B": -9.6484375,
"C": -12.5546875,
"D": -12.234375,
"E": -11.3828125
}
},
"flip": false
},
{
"ex_id": "aqua-test-25",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.0546875,
"scores": {
"A": -12.953125,
"B": -12.2578125,
"C": -12.203125,
"D": -12.4140625,
"E": -13.6328125
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.0546875,
"scores": {
"A": -12.953125,
"B": -12.2578125,
"C": -12.203125,
"D": -12.4140625,
"E": -13.6328125
}
},
"flip": false
},
{
"ex_id": "aqua-test-33",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 1.1875,
"scores": {
"A": -17.28125,
"B": -18.1875,
"C": -16.09375,
"D": -19.15625,
"E": -19.46875
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 1.1875,
"scores": {
"A": -17.28125,
"B": -18.1875,
"C": -16.09375,
"D": -19.15625,
"E": -19.46875
}
},
"flip": false
},
{
"ex_id": "aqua-test-39",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 1.71875,
"scores": {
"A": -10.2265625,
"B": -11.9453125,
"C": -12.1484375,
"D": -14.3125,
"E": -14.015625
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.71875,
"scores": {
"A": -10.2265625,
"B": -11.9453125,
"C": -12.1484375,
"D": -14.3125,
"E": -14.015625
}
},
"flip": false
},
{
"ex_id": "aqua-test-47",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.203125,
"scores": {
"A": -11.9453125,
"B": -12.5,
"C": -12.1171875,
"D": -13.046875,
"E": -11.7421875
}
},
"ablated": {
"pred_label": "E",
"correct": true,
"margin": 0.203125,
"scores": {
"A": -11.9453125,
"B": -12.5,
"C": -12.1171875,
"D": -13.046875,
"E": -11.7421875
}
},
"flip": false
},
{
"ex_id": "aqua-test-52",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.0625,
"scores": {
"A": -12.890625,
"B": -9.8515625,
"C": -9.9140625,
"D": -11.515625,
"E": -10.6875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.0625,
"scores": {
"A": -12.890625,
"B": -9.8515625,
"C": -9.9140625,
"D": -11.515625,
"E": -10.6875
}
},
"flip": false
},
{
"ex_id": "aqua-test-57",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.15625,
"scores": {
"A": -13.875,
"B": -12.96875,
"C": -14.359375,
"D": -14.140625,
"E": -13.125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.15625,
"scores": {
"A": -13.875,
"B": -12.96875,
"C": -14.359375,
"D": -14.140625,
"E": -13.125
}
},
"flip": false
},
{
"ex_id": "aqua-test-68",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.6875,
"scores": {
"A": -11.65625,
"B": -10.96875,
"C": -11.875,
"D": -12.078125,
"E": -12.640625
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.6875,
"scores": {
"A": -11.65625,
"B": -10.96875,
"C": -11.875,
"D": -12.078125,
"E": -12.640625
}
},
"flip": false
},
{
"ex_id": "aqua-test-78",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.078125,
"scores": {
"A": -12.7890625,
"B": -8.3203125,
"C": -11.3984375,
"D": -13.765625,
"E": -13.84375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 3.078125,
"scores": {
"A": -12.7890625,
"B": -8.3203125,
"C": -11.3984375,
"D": -13.765625,
"E": -13.84375
}
},
"flip": false
},
{
"ex_id": "aqua-test-87",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.625,
"scores": {
"A": -9.7890625,
"B": -9.1640625,
"C": -11.234375,
"D": -12.0,
"E": -11.46875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.625,
"scores": {
"A": -9.7890625,
"B": -9.1640625,
"C": -11.234375,
"D": -12.0,
"E": -11.46875
}
},
"flip": false
},
{
"ex_id": "aqua-test-100",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.1484375,
"scores": {
"A": -9.265625,
"B": -9.7265625,
"C": -9.1171875,
"D": -10.0546875,
"E": -10.6015625
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.1484375,
"scores": {
"A": -9.265625,
"B": -9.7265625,
"C": -9.1171875,
"D": -10.0546875,
"E": -10.6015625
}
},
"flip": false
},
{
"ex_id": "aqua-test-103",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.1484375,
"scores": {
"A": -9.734375,
"B": -8.5234375,
"C": -9.6875,
"D": -11.4375,
"E": -9.671875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.1484375,
"scores": {
"A": -9.734375,
"B": -8.5234375,
"C": -9.6875,
"D": -11.4375,
"E": -9.671875
}
},
"flip": false
},
{
"ex_id": "aqua-test-105",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 3.03125,
"scores": {
"A": -11.5,
"B": -12.0234375,
"C": -8.46875,
"D": -13.9765625,
"E": -13.28125
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 3.03125,
"scores": {
"A": -11.5,
"B": -12.0234375,
"C": -8.46875,
"D": -13.9765625,
"E": -13.28125
}
},
"flip": false
},
{
"ex_id": "aqua-test-111",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.203125,
"scores": {
"A": -9.796875,
"B": -9.2734375,
"C": -9.4765625,
"D": -10.7578125,
"E": -11.4296875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.203125,
"scores": {
"A": -9.796875,
"B": -9.2734375,
"C": -9.4765625,
"D": -10.7578125,
"E": -11.4296875
}
},
"flip": false
},
{
"ex_id": "aqua-test-116",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.7109375,
"scores": {
"A": -12.0390625,
"B": -9.7421875,
"C": -11.453125,
"D": -11.5390625,
"E": -11.8203125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.7109375,
"scores": {
"A": -12.0390625,
"B": -9.7421875,
"C": -11.453125,
"D": -11.5390625,
"E": -11.8203125
}
},
"flip": false
},
{
"ex_id": "aqua-test-120",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.3515625,
"scores": {
"A": -12.625,
"B": -10.171875,
"C": -10.5234375,
"D": -11.96875,
"E": -12.625
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.3515625,
"scores": {
"A": -12.625,
"B": -10.171875,
"C": -10.5234375,
"D": -11.96875,
"E": -12.625
}
},
"flip": false
},
{
"ex_id": "aqua-test-122",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.671875,
"scores": {
"A": -11.09375,
"B": -10.421875,
"C": -13.25,
"D": -13.296875,
"E": -13.5
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.671875,
"scores": {
"A": -11.09375,
"B": -10.421875,
"C": -13.25,
"D": -13.296875,
"E": -13.5
}
},
"flip": false
},
{
"ex_id": "aqua-test-123",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 1.9296875,
"scores": {
"A": -12.8125,
"B": -13.265625,
"C": -10.09375,
"D": -12.0234375,
"E": -12.84375
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 1.9296875,
"scores": {
"A": -12.8125,
"B": -13.265625,
"C": -10.09375,
"D": -12.0234375,
"E": -12.84375
}
},
"flip": false
},
{
"ex_id": "aqua-test-125",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.25,
"scores": {
"A": -12.8984375,
"B": -12.015625,
"C": -10.3671875,
"D": -10.6171875,
"E": -11.515625
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.25,
"scores": {
"A": -12.8984375,
"B": -12.015625,
"C": -10.3671875,
"D": -10.6171875,
"E": -11.515625
}
},
"flip": false
},
{
"ex_id": "aqua-test-130",
"gold": "D",
"baseline": {
"pred_label": "D",
"correct": true,
"margin": 0.5625,
"scores": {
"A": -12.53125,
"B": -11.625,
"C": -14.40625,
"D": -11.0625,
"E": -12.203125
}
},
"ablated": {
"pred_label": "D",
"correct": true,
"margin": 0.5625,
"scores": {
"A": -12.53125,
"B": -11.625,
"C": -14.40625,
"D": -11.0625,
"E": -12.203125
}
},
"flip": false
},
{
"ex_id": "aqua-test-140",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.09375,
"scores": {
"A": -12.171875,
"B": -10.953125,
"C": -12.484375,
"D": -12.046875,
"E": -12.828125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.09375,
"scores": {
"A": -12.171875,
"B": -10.953125,
"C": -12.484375,
"D": -12.046875,
"E": -12.828125
}
},
"flip": false
},
{
"ex_id": "aqua-test-141",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.390625,
"scores": {
"A": -15.65625,
"B": -14.0,
"C": -12.3359375,
"D": -12.7265625,
"E": -13.421875
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.390625,
"scores": {
"A": -15.65625,
"B": -14.0,
"C": -12.3359375,
"D": -12.7265625,
"E": -13.421875
}
},
"flip": false
},
{
"ex_id": "aqua-test-148",
"gold": "D",
"baseline": {
"pred_label": "D",
"correct": true,
"margin": 0.09375,
"scores": {
"A": -11.84375,
"B": -8.6875,
"C": -9.390625,
"D": -8.59375,
"E": -10.328125
}
},
"ablated": {
"pred_label": "D",
"correct": true,
"margin": 0.09375,
"scores": {
"A": -11.84375,
"B": -8.6875,
"C": -9.390625,
"D": -8.59375,
"E": -10.328125
}
},
"flip": false
},
{
"ex_id": "aqua-test-152",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.65625,
"scores": {
"A": -12.15625,
"B": -11.09375,
"C": -11.75,
"D": -11.765625,
"E": -11.75
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.65625,
"scores": {
"A": -12.15625,
"B": -11.09375,
"C": -11.75,
"D": -11.765625,
"E": -11.75
}
},
"flip": false
},
{
"ex_id": "aqua-test-167",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 2.28125,
"scores": {
"A": -13.3125,
"B": -10.640625,
"C": -12.921875,
"D": -16.09375,
"E": -14.75
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 2.28125,
"scores": {
"A": -13.3125,
"B": -10.640625,
"C": -12.921875,
"D": -16.09375,
"E": -14.75
}
},
"flip": false
},
{
"ex_id": "aqua-test-178",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.4609375,
"scores": {
"A": -13.1953125,
"B": -11.1015625,
"C": -12.8203125,
"D": -12.625,
"E": -10.640625
}
},
"ablated": {
"pred_label": "E",
"correct": true,
"margin": 0.4609375,
"scores": {
"A": -13.1953125,
"B": -11.1015625,
"C": -12.8203125,
"D": -12.625,
"E": -10.640625
}
},
"flip": false
},
{
"ex_id": "aqua-test-181",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.7734375,
"scores": {
"A": -9.6328125,
"B": -8.859375,
"C": -11.828125,
"D": -11.640625,
"E": -10.6875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.7734375,
"scores": {
"A": -9.6328125,
"B": -8.859375,
"C": -11.828125,
"D": -11.640625,
"E": -10.6875
}
},
"flip": false
},
{
"ex_id": "aqua-test-183",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.40625,
"scores": {
"A": -12.6484375,
"B": -9.3671875,
"C": -10.7734375,
"D": -13.140625,
"E": -13.125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.40625,
"scores": {
"A": -12.6484375,
"B": -9.3671875,
"C": -10.7734375,
"D": -13.140625,
"E": -13.125
}
},
"flip": false
},
{
"ex_id": "aqua-test-189",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.2109375,
"scores": {
"A": -14.5390625,
"B": -11.546875,
"C": -11.8046875,
"D": -11.7578125,
"E": -13.34375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.2109375,
"scores": {
"A": -14.5390625,
"B": -11.546875,
"C": -11.8046875,
"D": -11.7578125,
"E": -13.34375
}
},
"flip": false
},
{
"ex_id": "aqua-test-190",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.34375,
"scores": {
"A": -13.6015625,
"B": -10.7734375,
"C": -10.4296875,
"D": -13.3671875,
"E": -13.9296875
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.34375,
"scores": {
"A": -13.6015625,
"B": -10.7734375,
"C": -10.4296875,
"D": -13.3671875,
"E": -13.9296875
}
},
"flip": false
},
{
"ex_id": "aqua-test-191",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -11.890625,
"B": -10.9375,
"C": -13.640625,
"D": -14.109375,
"E": -13.765625
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -11.890625,
"B": -10.9375,
"C": -13.640625,
"D": -14.109375,
"E": -13.765625
}
},
"flip": false
},
{
"ex_id": "aqua-test-206",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.1796875,
"scores": {
"A": -11.6171875,
"B": -10.96875,
"C": -11.1484375,
"D": -12.84375,
"E": -14.0
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.1796875,
"scores": {
"A": -11.6171875,
"B": -10.96875,
"C": -11.1484375,
"D": -12.84375,
"E": -14.0
}
},
"flip": false
},
{
"ex_id": "aqua-test-212",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.5078125,
"scores": {
"A": -11.59375,
"B": -9.609375,
"C": -11.1171875,
"D": -11.7421875,
"E": -12.8359375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.5078125,
"scores": {
"A": -11.59375,
"B": -9.609375,
"C": -11.1171875,
"D": -11.7421875,
"E": -12.8359375
}
},
"flip": false
},
{
"ex_id": "aqua-test-223",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.09375,
"scores": {
"A": -10.265625,
"B": -7.1484375,
"C": -10.2421875,
"D": -10.921875,
"E": -11.3359375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 3.09375,
"scores": {
"A": -10.265625,
"B": -7.1484375,
"C": -10.2421875,
"D": -10.921875,
"E": -11.3359375
}
},
"flip": false
},
{
"ex_id": "aqua-test-228",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.078125,
"scores": {
"A": -13.3125,
"B": -10.71875,
"C": -10.796875,
"D": -14.0859375,
"E": -14.8984375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.078125,
"scores": {
"A": -13.3125,
"B": -10.71875,
"C": -10.796875,
"D": -14.0859375,
"E": -14.8984375
}
},
"flip": false
},
{
"ex_id": "aqua-test-249",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.828125,
"scores": {
"A": -10.328125,
"B": -8.859375,
"C": -8.03125,
"D": -11.640625,
"E": -11.109375
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.828125,
"scores": {
"A": -10.328125,
"B": -8.859375,
"C": -8.03125,
"D": -11.640625,
"E": -11.109375
}
},
"flip": false
},
{
"ex_id": "aqua-test-251",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.078125,
"scores": {
"A": -9.90625,
"B": -9.828125,
"C": -12.40625,
"D": -11.265625,
"E": -10.703125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.078125,
"scores": {
"A": -9.90625,
"B": -9.828125,
"C": -12.40625,
"D": -11.265625,
"E": -10.703125
}
},
"flip": false
}
],
"0.05": [
{
"ex_id": "aqua-test-2",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.2578125,
"scores": {
"A": -11.234375,
"B": -10.2109375,
"C": -13.171875,
"D": -12.4453125,
"E": -10.46875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.328125,
"scores": {
"A": -11.1015625,
"B": -10.0703125,
"C": -13.03125,
"D": -12.34375,
"E": -10.3984375
}
},
"flip": false
},
{
"ex_id": "aqua-test-5",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -11.9921875,
"B": -10.9765625,
"C": -12.0390625,
"D": -11.9609375,
"E": -11.9296875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -12.0,
"B": -10.96875,
"C": -12.109375,
"D": -11.953125,
"E": -11.921875
}
},
"flip": false
},
{
"ex_id": "aqua-test-9",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.078125,
"scores": {
"A": -11.265625,
"B": -8.890625,
"C": -9.96875,
"D": -12.359375,
"E": -13.9921875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.078125,
"scores": {
"A": -11.2421875,
"B": -8.8671875,
"C": -9.9453125,
"D": -12.3359375,
"E": -14.078125
}
},
"flip": false
},
{
"ex_id": "aqua-test-15",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.671875,
"scores": {
"A": -11.078125,
"B": -10.40625,
"C": -13.625,
"D": -15.3125,
"E": -13.8125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.734375,
"scores": {
"A": -11.03125,
"B": -10.296875,
"C": -13.5546875,
"D": -15.3359375,
"E": -13.8515625
}
},
"flip": false
},
{
"ex_id": "aqua-test-16",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 2.80078125,
"scores": {
"A": -12.484375,
"B": -10.515625,
"C": -7.71484375,
"D": -12.859375,
"E": -12.8125
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 2.8671875,
"scores": {
"A": -12.484375,
"B": -10.515625,
"C": -7.6484375,
"D": -12.84375,
"E": -12.84375
}
},
"flip": false
},
{
"ex_id": "aqua-test-21",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.765625,
"scores": {
"A": -10.4140625,
"B": -9.6484375,
"C": -12.5546875,
"D": -12.234375,
"E": -11.3828125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.875,
"scores": {
"A": -10.3828125,
"B": -9.5078125,
"C": -12.5,
"D": -12.203125,
"E": -11.359375
}
},
"flip": false
},
{
"ex_id": "aqua-test-25",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.0546875,
"scores": {
"A": -12.953125,
"B": -12.2578125,
"C": -12.203125,
"D": -12.4140625,
"E": -13.6328125
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.0078125,
"scores": {
"A": -12.8828125,
"B": -12.09375,
"C": -12.0859375,
"D": -12.3125,
"E": -13.4921875
}
},
"flip": false
},
{
"ex_id": "aqua-test-33",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 1.1875,
"scores": {
"A": -17.28125,
"B": -18.1875,
"C": -16.09375,
"D": -19.15625,
"E": -19.46875
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 1.15625,
"scores": {
"A": -17.0625,
"B": -17.921875,
"C": -15.90625,
"D": -18.84375,
"E": -19.140625
}
},
"flip": false
},
{
"ex_id": "aqua-test-39",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 1.71875,
"scores": {
"A": -10.2265625,
"B": -11.9453125,
"C": -12.1484375,
"D": -14.3125,
"E": -14.015625
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.6796875,
"scores": {
"A": -10.125,
"B": -11.8046875,
"C": -12.0859375,
"D": -14.1171875,
"E": -13.7890625
}
},
"flip": false
},
{
"ex_id": "aqua-test-47",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.203125,
"scores": {
"A": -11.9453125,
"B": -12.5,
"C": -12.1171875,
"D": -13.046875,
"E": -11.7421875
}
},
"ablated": {
"pred_label": "E",
"correct": true,
"margin": 0.21875,
"scores": {
"A": -12.0234375,
"B": -12.5625,
"C": -12.1640625,
"D": -13.09375,
"E": -11.8046875
}
},
"flip": false
},
{
"ex_id": "aqua-test-52",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.0625,
"scores": {
"A": -12.890625,
"B": -9.8515625,
"C": -9.9140625,
"D": -11.515625,
"E": -10.6875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.1015625,
"scores": {
"A": -12.828125,
"B": -9.765625,
"C": -9.8671875,
"D": -11.4921875,
"E": -10.640625
}
},
"flip": false
},
{
"ex_id": "aqua-test-57",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.15625,
"scores": {
"A": -13.875,
"B": -12.96875,
"C": -14.359375,
"D": -14.140625,
"E": -13.125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.1484375,
"scores": {
"A": -13.8515625,
"B": -12.9296875,
"C": -14.296875,
"D": -14.078125,
"E": -13.078125
}
},
"flip": false
},
{
"ex_id": "aqua-test-68",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.6875,
"scores": {
"A": -11.65625,
"B": -10.96875,
"C": -11.875,
"D": -12.078125,
"E": -12.640625
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.6640625,
"scores": {
"A": -11.546875,
"B": -10.8828125,
"C": -11.796875,
"D": -12.046875,
"E": -12.515625
}
},
"flip": false
},
{
"ex_id": "aqua-test-78",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.078125,
"scores": {
"A": -12.7890625,
"B": -8.3203125,
"C": -11.3984375,
"D": -13.765625,
"E": -13.84375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 3.078125,
"scores": {
"A": -12.734375,
"B": -8.2890625,
"C": -11.3671875,
"D": -13.6875,
"E": -13.8515625
}
},
"flip": false
},
{
"ex_id": "aqua-test-87",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.625,
"scores": {
"A": -9.7890625,
"B": -9.1640625,
"C": -11.234375,
"D": -12.0,
"E": -11.46875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.65625,
"scores": {
"A": -9.71875,
"B": -9.0625,
"C": -11.1875,
"D": -11.9296875,
"E": -11.390625
}
},
"flip": false
},
{
"ex_id": "aqua-test-100",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.1484375,
"scores": {
"A": -9.265625,
"B": -9.7265625,
"C": -9.1171875,
"D": -10.0546875,
"E": -10.6015625
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.140625,
"scores": {
"A": -9.2109375,
"B": -9.6875,
"C": -9.0703125,
"D": -10.0,
"E": -10.609375
}
},
"flip": false
},
{
"ex_id": "aqua-test-103",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.1484375,
"scores": {
"A": -9.734375,
"B": -8.5234375,
"C": -9.6875,
"D": -11.4375,
"E": -9.671875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.125,
"scores": {
"A": -9.78125,
"B": -8.578125,
"C": -9.75,
"D": -11.46875,
"E": -9.703125
}
},
"flip": false
},
{
"ex_id": "aqua-test-105",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 3.03125,
"scores": {
"A": -11.5,
"B": -12.0234375,
"C": -8.46875,
"D": -13.9765625,
"E": -13.28125
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 3.1484375,
"scores": {
"A": -11.5390625,
"B": -12.0078125,
"C": -8.390625,
"D": -13.984375,
"E": -13.3125
}
},
"flip": false
},
{
"ex_id": "aqua-test-111",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.203125,
"scores": {
"A": -9.796875,
"B": -9.2734375,
"C": -9.4765625,
"D": -10.7578125,
"E": -11.4296875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.203125,
"scores": {
"A": -9.796875,
"B": -9.265625,
"C": -9.46875,
"D": -10.796875,
"E": -11.421875
}
},
"flip": false
},
{
"ex_id": "aqua-test-116",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.7109375,
"scores": {
"A": -12.0390625,
"B": -9.7421875,
"C": -11.453125,
"D": -11.5390625,
"E": -11.8203125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.78125,
"scores": {
"A": -11.9375,
"B": -9.59375,
"C": -11.375,
"D": -11.421875,
"E": -11.75
}
},
"flip": false
},
{
"ex_id": "aqua-test-120",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.3515625,
"scores": {
"A": -12.625,
"B": -10.171875,
"C": -10.5234375,
"D": -11.96875,
"E": -12.625
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.28125,
"scores": {
"A": -12.5625,
"B": -10.0859375,
"C": -10.3671875,
"D": -11.875,
"E": -12.546875
}
},
"flip": false
},
{
"ex_id": "aqua-test-122",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.671875,
"scores": {
"A": -11.09375,
"B": -10.421875,
"C": -13.25,
"D": -13.296875,
"E": -13.5
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.71875,
"scores": {
"A": -11.140625,
"B": -10.421875,
"C": -13.28125,
"D": -13.3125,
"E": -13.484375
}
},
"flip": false
},
{
"ex_id": "aqua-test-123",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 1.9296875,
"scores": {
"A": -12.8125,
"B": -13.265625,
"C": -10.09375,
"D": -12.0234375,
"E": -12.84375
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 1.9921875,
"scores": {
"A": -12.84375,
"B": -13.3671875,
"C": -10.0703125,
"D": -12.0625,
"E": -12.8359375
}
},
"flip": false
},
{
"ex_id": "aqua-test-125",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.25,
"scores": {
"A": -12.8984375,
"B": -12.015625,
"C": -10.3671875,
"D": -10.6171875,
"E": -11.515625
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.09375,
"scores": {
"A": -12.96875,
"B": -12.0703125,
"C": -10.5859375,
"D": -10.6796875,
"E": -11.546875
}
},
"flip": false
},
{
"ex_id": "aqua-test-130",
"gold": "D",
"baseline": {
"pred_label": "D",
"correct": true,
"margin": 0.5625,
"scores": {
"A": -12.53125,
"B": -11.625,
"C": -14.40625,
"D": -11.0625,
"E": -12.203125
}
},
"ablated": {
"pred_label": "D",
"correct": true,
"margin": 0.5,
"scores": {
"A": -12.453125,
"B": -11.46875,
"C": -14.265625,
"D": -10.96875,
"E": -12.125
}
},
"flip": false
},
{
"ex_id": "aqua-test-140",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.09375,
"scores": {
"A": -12.171875,
"B": -10.953125,
"C": -12.484375,
"D": -12.046875,
"E": -12.828125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.125,
"scores": {
"A": -12.015625,
"B": -10.8125,
"C": -12.3125,
"D": -11.9375,
"E": -12.71875
}
},
"flip": false
},
{
"ex_id": "aqua-test-141",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.390625,
"scores": {
"A": -15.65625,
"B": -14.0,
"C": -12.3359375,
"D": -12.7265625,
"E": -13.421875
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.359375,
"scores": {
"A": -15.5625,
"B": -13.9375,
"C": -12.3359375,
"D": -12.6953125,
"E": -13.3359375
}
},
"flip": false
},
{
"ex_id": "aqua-test-148",
"gold": "D",
"baseline": {
"pred_label": "D",
"correct": true,
"margin": 0.09375,
"scores": {
"A": -11.84375,
"B": -8.6875,
"C": -9.390625,
"D": -8.59375,
"E": -10.328125
}
},
"ablated": {
"pred_label": "D",
"correct": true,
"margin": 0.015625,
"scores": {
"A": -11.65625,
"B": -8.40625,
"C": -9.1875,
"D": -8.390625,
"E": -10.09375
}
},
"flip": false
},
{
"ex_id": "aqua-test-152",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.65625,
"scores": {
"A": -12.15625,
"B": -11.09375,
"C": -11.75,
"D": -11.765625,
"E": -11.75
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.625,
"scores": {
"A": -12.03125,
"B": -11.015625,
"C": -11.6875,
"D": -11.703125,
"E": -11.640625
}
},
"flip": false
},
{
"ex_id": "aqua-test-167",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 2.28125,
"scores": {
"A": -13.3125,
"B": -10.640625,
"C": -12.921875,
"D": -16.09375,
"E": -14.75
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 2.28125,
"scores": {
"A": -13.34375,
"B": -10.71875,
"C": -13.0,
"D": -16.109375,
"E": -14.8125
}
},
"flip": false
},
{
"ex_id": "aqua-test-178",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.4609375,
"scores": {
"A": -13.1953125,
"B": -11.1015625,
"C": -12.8203125,
"D": -12.625,
"E": -10.640625
}
},
"ablated": {
"pred_label": "E",
"correct": true,
"margin": 0.53125,
"scores": {
"A": -13.09375,
"B": -11.0,
"C": -12.75,
"D": -12.4375,
"E": -10.46875
}
},
"flip": false
},
{
"ex_id": "aqua-test-181",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.7734375,
"scores": {
"A": -9.6328125,
"B": -8.859375,
"C": -11.828125,
"D": -11.640625,
"E": -10.6875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.71875,
"scores": {
"A": -9.546875,
"B": -8.828125,
"C": -11.765625,
"D": -11.5859375,
"E": -10.625
}
},
"flip": false
},
{
"ex_id": "aqua-test-183",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.40625,
"scores": {
"A": -12.6484375,
"B": -9.3671875,
"C": -10.7734375,
"D": -13.140625,
"E": -13.125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.359375,
"scores": {
"A": -12.625,
"B": -9.34375,
"C": -10.703125,
"D": -13.125,
"E": -13.109375
}
},
"flip": false
},
{
"ex_id": "aqua-test-189",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.2109375,
"scores": {
"A": -14.5390625,
"B": -11.546875,
"C": -11.8046875,
"D": -11.7578125,
"E": -13.34375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.2421875,
"scores": {
"A": -14.4453125,
"B": -11.5078125,
"C": -11.7890625,
"D": -11.75,
"E": -13.25
}
},
"flip": false
},
{
"ex_id": "aqua-test-190",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.34375,
"scores": {
"A": -13.6015625,
"B": -10.7734375,
"C": -10.4296875,
"D": -13.3671875,
"E": -13.9296875
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.40625,
"scores": {
"A": -13.609375,
"B": -10.765625,
"C": -10.359375,
"D": -13.375,
"E": -14.046875
}
},
"flip": false
},
{
"ex_id": "aqua-test-191",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -11.890625,
"B": -10.9375,
"C": -13.640625,
"D": -14.109375,
"E": -13.765625
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.984375,
"scores": {
"A": -11.7734375,
"B": -10.7890625,
"C": -13.40625,
"D": -13.90625,
"E": -13.484375
}
},
"flip": false
},
{
"ex_id": "aqua-test-206",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.1796875,
"scores": {
"A": -11.6171875,
"B": -10.96875,
"C": -11.1484375,
"D": -12.84375,
"E": -14.0
}
},
"ablated": {
"pred_label": "C",
"correct": false,
"margin": -0.0390625,
"scores": {
"A": -11.625,
"B": -11.0625,
"C": -11.0234375,
"D": -12.8203125,
"E": -13.9140625
}
},
"flip": true
},
{
"ex_id": "aqua-test-212",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.5078125,
"scores": {
"A": -11.59375,
"B": -9.609375,
"C": -11.1171875,
"D": -11.7421875,
"E": -12.8359375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.4921875,
"scores": {
"A": -11.53125,
"B": -9.5625,
"C": -11.0546875,
"D": -11.703125,
"E": -12.8046875
}
},
"flip": false
},
{
"ex_id": "aqua-test-223",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.09375,
"scores": {
"A": -10.265625,
"B": -7.1484375,
"C": -10.2421875,
"D": -10.921875,
"E": -11.3359375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 3.109375,
"scores": {
"A": -10.3125,
"B": -7.109375,
"C": -10.21875,
"D": -10.9375,
"E": -11.4375
}
},
"flip": false
},
{
"ex_id": "aqua-test-228",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.078125,
"scores": {
"A": -13.3125,
"B": -10.71875,
"C": -10.796875,
"D": -14.0859375,
"E": -14.8984375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.09375,
"scores": {
"A": -13.2421875,
"B": -10.6640625,
"C": -10.7578125,
"D": -14.0,
"E": -14.8125
}
},
"flip": false
},
{
"ex_id": "aqua-test-249",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.828125,
"scores": {
"A": -10.328125,
"B": -8.859375,
"C": -8.03125,
"D": -11.640625,
"E": -11.109375
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.78125,
"scores": {
"A": -10.375,
"B": -8.890625,
"C": -8.109375,
"D": -11.6875,
"E": -11.09375
}
},
"flip": false
},
{
"ex_id": "aqua-test-251",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.078125,
"scores": {
"A": -9.90625,
"B": -9.828125,
"C": -12.40625,
"D": -11.265625,
"E": -10.703125
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.0546875,
"scores": {
"A": -9.7265625,
"B": -9.78125,
"C": -12.34375,
"D": -11.1875,
"E": -10.609375
}
},
"flip": true
}
],
"0.1": [
{
"ex_id": "aqua-test-2",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.2578125,
"scores": {
"A": -11.234375,
"B": -10.2109375,
"C": -13.171875,
"D": -12.4453125,
"E": -10.46875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.390625,
"scores": {
"A": -10.921875,
"B": -9.890625,
"C": -12.859375,
"D": -12.21875,
"E": -10.28125
}
},
"flip": false
},
{
"ex_id": "aqua-test-5",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -11.9921875,
"B": -10.9765625,
"C": -12.0390625,
"D": -11.9609375,
"E": -11.9296875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -11.90625,
"B": -10.90625,
"C": -12.0625,
"D": -11.875,
"E": -11.859375
}
},
"flip": false
},
{
"ex_id": "aqua-test-9",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.078125,
"scores": {
"A": -11.265625,
"B": -8.890625,
"C": -9.96875,
"D": -12.359375,
"E": -13.9921875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.078125,
"scores": {
"A": -11.203125,
"B": -8.8359375,
"C": -9.9140625,
"D": -12.3125,
"E": -14.171875
}
},
"flip": false
},
{
"ex_id": "aqua-test-15",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.671875,
"scores": {
"A": -11.078125,
"B": -10.40625,
"C": -13.625,
"D": -15.3125,
"E": -13.8125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.78125,
"scores": {
"A": -10.890625,
"B": -10.109375,
"C": -13.390625,
"D": -15.265625,
"E": -13.84375
}
},
"flip": false
},
{
"ex_id": "aqua-test-16",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 2.80078125,
"scores": {
"A": -12.484375,
"B": -10.515625,
"C": -7.71484375,
"D": -12.859375,
"E": -12.8125
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 2.9375,
"scores": {
"A": -12.4296875,
"B": -10.46875,
"C": -7.53125,
"D": -12.7734375,
"E": -12.8125
}
},
"flip": false
},
{
"ex_id": "aqua-test-21",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.765625,
"scores": {
"A": -10.4140625,
"B": -9.6484375,
"C": -12.5546875,
"D": -12.234375,
"E": -11.3828125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.9765625,
"scores": {
"A": -10.34375,
"B": -9.3671875,
"C": -12.4375,
"D": -12.171875,
"E": -11.3203125
}
},
"flip": false
},
{
"ex_id": "aqua-test-25",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.0546875,
"scores": {
"A": -12.953125,
"B": -12.2578125,
"C": -12.203125,
"D": -12.4140625,
"E": -13.6328125
}
},
"ablated": {
"pred_label": "B",
"correct": false,
"margin": -0.046875,
"scores": {
"A": -12.6875,
"B": -11.8125,
"C": -11.859375,
"D": -12.09375,
"E": -13.21875
}
},
"flip": true
},
{
"ex_id": "aqua-test-33",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 1.1875,
"scores": {
"A": -17.28125,
"B": -18.1875,
"C": -16.09375,
"D": -19.15625,
"E": -19.46875
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 1.28125,
"scores": {
"A": -16.671875,
"B": -17.34375,
"C": -15.390625,
"D": -18.234375,
"E": -18.515625
}
},
"flip": false
},
{
"ex_id": "aqua-test-39",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 1.71875,
"scores": {
"A": -10.2265625,
"B": -11.9453125,
"C": -12.1484375,
"D": -14.3125,
"E": -14.015625
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.65625,
"scores": {
"A": -10.046875,
"B": -11.703125,
"C": -12.03125,
"D": -13.9375,
"E": -13.5625
}
},
"flip": false
},
{
"ex_id": "aqua-test-47",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.203125,
"scores": {
"A": -11.9453125,
"B": -12.5,
"C": -12.1171875,
"D": -13.046875,
"E": -11.7421875
}
},
"ablated": {
"pred_label": "E",
"correct": true,
"margin": 0.21875,
"scores": {
"A": -12.0546875,
"B": -12.609375,
"C": -12.15625,
"D": -13.0859375,
"E": -11.8359375
}
},
"flip": false
},
{
"ex_id": "aqua-test-52",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.0625,
"scores": {
"A": -12.890625,
"B": -9.8515625,
"C": -9.9140625,
"D": -11.515625,
"E": -10.6875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.140625,
"scores": {
"A": -12.75,
"B": -9.65625,
"C": -9.796875,
"D": -11.46875,
"E": -10.59375
}
},
"flip": false
},
{
"ex_id": "aqua-test-57",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.15625,
"scores": {
"A": -13.875,
"B": -12.96875,
"C": -14.359375,
"D": -14.140625,
"E": -13.125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.140625,
"scores": {
"A": -13.75,
"B": -12.84375,
"C": -14.171875,
"D": -13.96875,
"E": -12.984375
}
},
"flip": false
},
{
"ex_id": "aqua-test-68",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.6875,
"scores": {
"A": -11.65625,
"B": -10.96875,
"C": -11.875,
"D": -12.078125,
"E": -12.640625
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.65625,
"scores": {
"A": -11.4609375,
"B": -10.8046875,
"C": -11.7265625,
"D": -12.015625,
"E": -12.390625
}
},
"flip": false
},
{
"ex_id": "aqua-test-78",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.078125,
"scores": {
"A": -12.7890625,
"B": -8.3203125,
"C": -11.3984375,
"D": -13.765625,
"E": -13.84375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 3.09375,
"scores": {
"A": -12.65625,
"B": -8.21875,
"C": -11.3125,
"D": -13.59375,
"E": -13.828125
}
},
"flip": false
},
{
"ex_id": "aqua-test-87",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.625,
"scores": {
"A": -9.7890625,
"B": -9.1640625,
"C": -11.234375,
"D": -12.0,
"E": -11.46875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.6640625,
"scores": {
"A": -9.6015625,
"B": -8.9375,
"C": -11.125,
"D": -11.8359375,
"E": -11.296875
}
},
"flip": false
},
{
"ex_id": "aqua-test-100",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.1484375,
"scores": {
"A": -9.265625,
"B": -9.7265625,
"C": -9.1171875,
"D": -10.0546875,
"E": -10.6015625
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.1328125,
"scores": {
"A": -9.125,
"B": -9.609375,
"C": -8.9921875,
"D": -9.9375,
"E": -10.609375
}
},
"flip": false
},
{
"ex_id": "aqua-test-103",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.1484375,
"scores": {
"A": -9.734375,
"B": -8.5234375,
"C": -9.6875,
"D": -11.4375,
"E": -9.671875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.1171875,
"scores": {
"A": -9.796875,
"B": -8.6171875,
"C": -9.796875,
"D": -11.5,
"E": -9.734375
}
},
"flip": false
},
{
"ex_id": "aqua-test-105",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 3.03125,
"scores": {
"A": -11.5,
"B": -12.0234375,
"C": -8.46875,
"D": -13.9765625,
"E": -13.28125
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 3.2421875,
"scores": {
"A": -11.4921875,
"B": -11.9296875,
"C": -8.25,
"D": -13.9375,
"E": -13.296875
}
},
"flip": false
},
{
"ex_id": "aqua-test-111",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.203125,
"scores": {
"A": -9.796875,
"B": -9.2734375,
"C": -9.4765625,
"D": -10.7578125,
"E": -11.4296875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.1953125,
"scores": {
"A": -9.75,
"B": -9.21875,
"C": -9.4140625,
"D": -10.765625,
"E": -11.3671875
}
},
"flip": false
},
{
"ex_id": "aqua-test-116",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.7109375,
"scores": {
"A": -12.0390625,
"B": -9.7421875,
"C": -11.453125,
"D": -11.5390625,
"E": -11.8203125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.875,
"scores": {
"A": -11.765625,
"B": -9.390625,
"C": -11.265625,
"D": -11.28125,
"E": -11.6328125
}
},
"flip": false
},
{
"ex_id": "aqua-test-120",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.3515625,
"scores": {
"A": -12.625,
"B": -10.171875,
"C": -10.5234375,
"D": -11.96875,
"E": -12.625
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.2421875,
"scores": {
"A": -12.46875,
"B": -9.9609375,
"C": -10.203125,
"D": -11.7578125,
"E": -12.4765625
}
},
"flip": false
},
{
"ex_id": "aqua-test-122",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.671875,
"scores": {
"A": -11.09375,
"B": -10.421875,
"C": -13.25,
"D": -13.296875,
"E": -13.5
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.7421875,
"scores": {
"A": -11.1328125,
"B": -10.390625,
"C": -13.265625,
"D": -13.28125,
"E": -13.453125
}
},
"flip": false
},
{
"ex_id": "aqua-test-123",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 1.9296875,
"scores": {
"A": -12.8125,
"B": -13.265625,
"C": -10.09375,
"D": -12.0234375,
"E": -12.84375
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 2.046875,
"scores": {
"A": -12.78125,
"B": -13.40625,
"C": -9.984375,
"D": -12.03125,
"E": -12.78125
}
},
"flip": false
},
{
"ex_id": "aqua-test-125",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.25,
"scores": {
"A": -12.8984375,
"B": -12.015625,
"C": -10.3671875,
"D": -10.6171875,
"E": -11.515625
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.0,
"scores": {
"A": -12.96875,
"B": -12.03125,
"C": -10.6875,
"D": -10.6875,
"E": -11.53125
}
},
"flip": false
},
{
"ex_id": "aqua-test-130",
"gold": "D",
"baseline": {
"pred_label": "D",
"correct": true,
"margin": 0.5625,
"scores": {
"A": -12.53125,
"B": -11.625,
"C": -14.40625,
"D": -11.0625,
"E": -12.203125
}
},
"ablated": {
"pred_label": "D",
"correct": true,
"margin": 0.421875,
"scores": {
"A": -12.3203125,
"B": -11.265625,
"C": -14.0625,
"D": -10.84375,
"E": -11.96875
}
},
"flip": false
},
{
"ex_id": "aqua-test-140",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.09375,
"scores": {
"A": -12.171875,
"B": -10.953125,
"C": -12.484375,
"D": -12.046875,
"E": -12.828125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.15625,
"scores": {
"A": -11.8125,
"B": -10.65625,
"C": -12.09375,
"D": -11.8125,
"E": -12.59375
}
},
"flip": false
},
{
"ex_id": "aqua-test-141",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.390625,
"scores": {
"A": -15.65625,
"B": -14.0,
"C": -12.3359375,
"D": -12.7265625,
"E": -13.421875
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.34375,
"scores": {
"A": -15.421875,
"B": -13.8359375,
"C": -12.265625,
"D": -12.609375,
"E": -13.203125
}
},
"flip": false
},
{
"ex_id": "aqua-test-148",
"gold": "D",
"baseline": {
"pred_label": "D",
"correct": true,
"margin": 0.09375,
"scores": {
"A": -11.84375,
"B": -8.6875,
"C": -9.390625,
"D": -8.59375,
"E": -10.328125
}
},
"ablated": {
"pred_label": "B",
"correct": false,
"margin": -0.015625,
"scores": {
"A": -11.453125,
"B": -8.171875,
"C": -8.96875,
"D": -8.1875,
"E": -9.875
}
},
"flip": true
},
{
"ex_id": "aqua-test-152",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.65625,
"scores": {
"A": -12.15625,
"B": -11.09375,
"C": -11.75,
"D": -11.765625,
"E": -11.75
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.59375,
"scores": {
"A": -11.8828125,
"B": -10.9296875,
"C": -11.59375,
"D": -11.609375,
"E": -11.5234375
}
},
"flip": false
},
{
"ex_id": "aqua-test-167",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 2.28125,
"scores": {
"A": -13.3125,
"B": -10.640625,
"C": -12.921875,
"D": -16.09375,
"E": -14.75
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 2.2421875,
"scores": {
"A": -13.265625,
"B": -10.7109375,
"C": -12.953125,
"D": -16.0,
"E": -14.765625
}
},
"flip": false
},
{
"ex_id": "aqua-test-178",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.4609375,
"scores": {
"A": -13.1953125,
"B": -11.1015625,
"C": -12.8203125,
"D": -12.625,
"E": -10.640625
}
},
"ablated": {
"pred_label": "E",
"correct": true,
"margin": 0.5546875,
"scores": {
"A": -12.8984375,
"B": -10.78125,
"C": -12.5234375,
"D": -12.1640625,
"E": -10.2265625
}
},
"flip": false
},
{
"ex_id": "aqua-test-181",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.7734375,
"scores": {
"A": -9.6328125,
"B": -8.859375,
"C": -11.828125,
"D": -11.640625,
"E": -10.6875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.671875,
"scores": {
"A": -9.4296875,
"B": -8.7578125,
"C": -11.671875,
"D": -11.515625,
"E": -10.546875
}
},
"flip": false
},
{
"ex_id": "aqua-test-183",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.40625,
"scores": {
"A": -12.6484375,
"B": -9.3671875,
"C": -10.7734375,
"D": -13.140625,
"E": -13.125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.3125,
"scores": {
"A": -12.546875,
"B": -9.28125,
"C": -10.59375,
"D": -13.078125,
"E": -13.046875
}
},
"flip": false
},
{
"ex_id": "aqua-test-189",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.2109375,
"scores": {
"A": -14.5390625,
"B": -11.546875,
"C": -11.8046875,
"D": -11.7578125,
"E": -13.34375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.28125,
"scores": {
"A": -14.328125,
"B": -11.46875,
"C": -11.75,
"D": -11.75,
"E": -13.125
}
},
"flip": false
},
{
"ex_id": "aqua-test-190",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.34375,
"scores": {
"A": -13.6015625,
"B": -10.7734375,
"C": -10.4296875,
"D": -13.3671875,
"E": -13.9296875
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.421875,
"scores": {
"A": -13.5078125,
"B": -10.6640625,
"C": -10.2421875,
"D": -13.3046875,
"E": -14.078125
}
},
"flip": false
},
{
"ex_id": "aqua-test-191",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -11.890625,
"B": -10.9375,
"C": -13.640625,
"D": -14.109375,
"E": -13.765625
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.984375,
"scores": {
"A": -11.59375,
"B": -10.609375,
"C": -13.15625,
"D": -13.6875,
"E": -13.25
}
},
"flip": false
},
{
"ex_id": "aqua-test-206",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.1796875,
"scores": {
"A": -11.6171875,
"B": -10.96875,
"C": -11.1484375,
"D": -12.84375,
"E": -14.0
}
},
"ablated": {
"pred_label": "C",
"correct": false,
"margin": -0.2109375,
"scores": {
"A": -11.5703125,
"B": -11.109375,
"C": -10.8984375,
"D": -12.75,
"E": -13.8203125
}
},
"flip": true
},
{
"ex_id": "aqua-test-212",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.5078125,
"scores": {
"A": -11.59375,
"B": -9.609375,
"C": -11.1171875,
"D": -11.7421875,
"E": -12.8359375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.5,
"scores": {
"A": -11.421875,
"B": -9.4609375,
"C": -10.9609375,
"D": -11.6328125,
"E": -12.75
}
},
"flip": false
},
{
"ex_id": "aqua-test-223",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.09375,
"scores": {
"A": -10.265625,
"B": -7.1484375,
"C": -10.2421875,
"D": -10.921875,
"E": -11.3359375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 3.1328125,
"scores": {
"A": -10.3359375,
"B": -7.0625,
"C": -10.1953125,
"D": -10.9296875,
"E": -11.5
}
},
"flip": false
},
{
"ex_id": "aqua-test-228",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.078125,
"scores": {
"A": -13.3125,
"B": -10.71875,
"C": -10.796875,
"D": -14.0859375,
"E": -14.8984375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.1171875,
"scores": {
"A": -13.1171875,
"B": -10.5625,
"C": -10.6796875,
"D": -13.875,
"E": -14.640625
}
},
"flip": false
},
{
"ex_id": "aqua-test-249",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.828125,
"scores": {
"A": -10.328125,
"B": -8.859375,
"C": -8.03125,
"D": -11.640625,
"E": -11.109375
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.765625,
"scores": {
"A": -10.375,
"B": -8.890625,
"C": -8.125,
"D": -11.703125,
"E": -11.0625
}
},
"flip": false
},
{
"ex_id": "aqua-test-251",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.078125,
"scores": {
"A": -9.90625,
"B": -9.828125,
"C": -12.40625,
"D": -11.265625,
"E": -10.703125
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.171875,
"scores": {
"A": -9.4921875,
"B": -9.6640625,
"C": -12.203125,
"D": -11.0390625,
"E": -10.453125
}
},
"flip": true
}
],
"0.2": [
{
"ex_id": "aqua-test-2",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.2578125,
"scores": {
"A": -11.234375,
"B": -10.2109375,
"C": -13.171875,
"D": -12.4453125,
"E": -10.46875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.578125,
"scores": {
"A": -10.390625,
"B": -9.359375,
"C": -12.34375,
"D": -11.828125,
"E": -9.9375
}
},
"flip": false
},
{
"ex_id": "aqua-test-5",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -11.9921875,
"B": -10.9765625,
"C": -12.0390625,
"D": -11.9609375,
"E": -11.9296875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -11.6171875,
"B": -10.640625,
"C": -11.796875,
"D": -11.6171875,
"E": -11.59375
}
},
"flip": false
},
{
"ex_id": "aqua-test-9",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.078125,
"scores": {
"A": -11.265625,
"B": -8.890625,
"C": -9.96875,
"D": -12.359375,
"E": -13.9921875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.0859375,
"scores": {
"A": -10.9921875,
"B": -8.6796875,
"C": -9.765625,
"D": -12.1640625,
"E": -14.25
}
},
"flip": false
},
{
"ex_id": "aqua-test-15",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.671875,
"scores": {
"A": -11.078125,
"B": -10.40625,
"C": -13.625,
"D": -15.3125,
"E": -13.8125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.8125,
"scores": {
"A": -10.46875,
"B": -9.65625,
"C": -12.953125,
"D": -15.015625,
"E": -13.765625
}
},
"flip": false
},
{
"ex_id": "aqua-test-16",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 2.80078125,
"scores": {
"A": -12.484375,
"B": -10.515625,
"C": -7.71484375,
"D": -12.859375,
"E": -12.8125
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 3.0546875,
"scores": {
"A": -11.90625,
"B": -10.0625,
"C": -7.0078125,
"D": -12.2265625,
"E": -12.40625
}
},
"flip": false
},
{
"ex_id": "aqua-test-21",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.765625,
"scores": {
"A": -10.4140625,
"B": -9.6484375,
"C": -12.5546875,
"D": -12.234375,
"E": -11.3828125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.171875,
"scores": {
"A": -10.15625,
"B": -8.984375,
"C": -12.1875,
"D": -12.0546875,
"E": -11.234375
}
},
"flip": false
},
{
"ex_id": "aqua-test-25",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.0546875,
"scores": {
"A": -12.953125,
"B": -12.2578125,
"C": -12.203125,
"D": -12.4140625,
"E": -13.6328125
}
},
"ablated": {
"pred_label": "B",
"correct": false,
"margin": -0.109375,
"scores": {
"A": -12.09375,
"B": -11.15625,
"C": -11.265625,
"D": -11.546875,
"E": -12.546875
}
},
"flip": true
},
{
"ex_id": "aqua-test-33",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 1.1875,
"scores": {
"A": -17.28125,
"B": -18.1875,
"C": -16.09375,
"D": -19.15625,
"E": -19.46875
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 1.78125,
"scores": {
"A": -15.453125,
"B": -15.421875,
"C": -13.640625,
"D": -16.265625,
"E": -16.453125
}
},
"flip": false
},
{
"ex_id": "aqua-test-39",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 1.71875,
"scores": {
"A": -10.2265625,
"B": -11.9453125,
"C": -12.1484375,
"D": -14.3125,
"E": -14.015625
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 1.59375,
"scores": {
"A": -9.765625,
"B": -11.359375,
"C": -11.796875,
"D": -13.546875,
"E": -13.078125
}
},
"flip": false
},
{
"ex_id": "aqua-test-47",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.203125,
"scores": {
"A": -11.9453125,
"B": -12.5,
"C": -12.1171875,
"D": -13.046875,
"E": -11.7421875
}
},
"ablated": {
"pred_label": "E",
"correct": true,
"margin": 0.1953125,
"scores": {
"A": -11.9921875,
"B": -12.65625,
"C": -12.046875,
"D": -12.9765625,
"E": -11.796875
}
},
"flip": false
},
{
"ex_id": "aqua-test-52",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.0625,
"scores": {
"A": -12.890625,
"B": -9.8515625,
"C": -9.9140625,
"D": -11.515625,
"E": -10.6875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.15625,
"scores": {
"A": -12.46875,
"B": -9.359375,
"C": -9.515625,
"D": -11.3125,
"E": -10.484375
}
},
"flip": false
},
{
"ex_id": "aqua-test-57",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.15625,
"scores": {
"A": -13.875,
"B": -12.96875,
"C": -14.359375,
"D": -14.140625,
"E": -13.125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.171875,
"scores": {
"A": -13.4375,
"B": -12.546875,
"C": -13.8125,
"D": -13.65625,
"E": -12.71875
}
},
"flip": false
},
{
"ex_id": "aqua-test-68",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.6875,
"scores": {
"A": -11.65625,
"B": -10.96875,
"C": -11.875,
"D": -12.078125,
"E": -12.640625
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.6640625,
"scores": {
"A": -11.21875,
"B": -10.5546875,
"C": -11.484375,
"D": -11.875,
"E": -12.0625
}
},
"flip": false
},
{
"ex_id": "aqua-test-78",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.078125,
"scores": {
"A": -12.7890625,
"B": -8.3203125,
"C": -11.3984375,
"D": -13.765625,
"E": -13.84375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 3.125,
"scores": {
"A": -12.3984375,
"B": -8.03125,
"C": -11.15625,
"D": -13.390625,
"E": -13.75
}
},
"flip": false
},
{
"ex_id": "aqua-test-87",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.625,
"scores": {
"A": -9.7890625,
"B": -9.1640625,
"C": -11.234375,
"D": -12.0,
"E": -11.46875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.625,
"scores": {
"A": -9.296875,
"B": -8.671875,
"C": -10.9375,
"D": -11.5625,
"E": -11.015625
}
},
"flip": false
},
{
"ex_id": "aqua-test-100",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.1484375,
"scores": {
"A": -9.265625,
"B": -9.7265625,
"C": -9.1171875,
"D": -10.0546875,
"E": -10.6015625
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.1015625,
"scores": {
"A": -8.734375,
"B": -9.21875,
"C": -8.6328125,
"D": -9.640625,
"E": -10.421875
}
},
"flip": false
},
{
"ex_id": "aqua-test-103",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.1484375,
"scores": {
"A": -9.734375,
"B": -8.5234375,
"C": -9.6875,
"D": -11.4375,
"E": -9.671875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.1328125,
"scores": {
"A": -9.65625,
"B": -8.5234375,
"C": -9.734375,
"D": -11.484375,
"E": -9.75
}
},
"flip": false
},
{
"ex_id": "aqua-test-105",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 3.03125,
"scores": {
"A": -11.5,
"B": -12.0234375,
"C": -8.46875,
"D": -13.9765625,
"E": -13.28125
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 3.38671875,
"scores": {
"A": -11.265625,
"B": -11.75,
"C": -7.87890625,
"D": -13.8125,
"E": -13.2265625
}
},
"flip": false
},
{
"ex_id": "aqua-test-111",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.203125,
"scores": {
"A": -9.796875,
"B": -9.2734375,
"C": -9.4765625,
"D": -10.7578125,
"E": -11.4296875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.1796875,
"scores": {
"A": -9.5859375,
"B": -9.0859375,
"C": -9.265625,
"D": -10.625,
"E": -11.1953125
}
},
"flip": false
},
{
"ex_id": "aqua-test-116",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.7109375,
"scores": {
"A": -12.0390625,
"B": -9.7421875,
"C": -11.453125,
"D": -11.5390625,
"E": -11.8203125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 2.0,
"scores": {
"A": -11.296875,
"B": -8.984375,
"C": -11.015625,
"D": -10.984375,
"E": -11.359375
}
},
"flip": false
},
{
"ex_id": "aqua-test-120",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.3515625,
"scores": {
"A": -12.625,
"B": -10.171875,
"C": -10.5234375,
"D": -11.96875,
"E": -12.625
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.21875,
"scores": {
"A": -12.203125,
"B": -9.6171875,
"C": -9.8359375,
"D": -11.46875,
"E": -12.3125
}
},
"flip": false
},
{
"ex_id": "aqua-test-122",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.671875,
"scores": {
"A": -11.09375,
"B": -10.421875,
"C": -13.25,
"D": -13.296875,
"E": -13.5
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.734375,
"scores": {
"A": -10.984375,
"B": -10.25,
"C": -13.140625,
"D": -13.140625,
"E": -13.328125
}
},
"flip": false
},
{
"ex_id": "aqua-test-123",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 1.9296875,
"scores": {
"A": -12.8125,
"B": -13.265625,
"C": -10.09375,
"D": -12.0234375,
"E": -12.84375
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 2.1328125,
"scores": {
"A": -12.390625,
"B": -13.234375,
"C": -9.671875,
"D": -11.8046875,
"E": -12.515625
}
},
"flip": false
},
{
"ex_id": "aqua-test-125",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.25,
"scores": {
"A": -12.8984375,
"B": -12.015625,
"C": -10.3671875,
"D": -10.6171875,
"E": -11.515625
}
},
"ablated": {
"pred_label": "D",
"correct": false,
"margin": -0.015625,
"scores": {
"A": -12.59375,
"B": -11.640625,
"C": -10.5,
"D": -10.484375,
"E": -11.25
}
},
"flip": true
},
{
"ex_id": "aqua-test-130",
"gold": "D",
"baseline": {
"pred_label": "D",
"correct": true,
"margin": 0.5625,
"scores": {
"A": -12.53125,
"B": -11.625,
"C": -14.40625,
"D": -11.0625,
"E": -12.203125
}
},
"ablated": {
"pred_label": "D",
"correct": true,
"margin": 0.296875,
"scores": {
"A": -11.875,
"B": -10.78125,
"C": -13.421875,
"D": -10.484375,
"E": -11.34375
}
},
"flip": false
},
{
"ex_id": "aqua-test-140",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.09375,
"scores": {
"A": -12.171875,
"B": -10.953125,
"C": -12.484375,
"D": -12.046875,
"E": -12.828125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.0078125,
"scores": {
"A": -11.3984375,
"B": -10.390625,
"C": -11.703125,
"D": -11.6015625,
"E": -12.421875
}
},
"flip": false
},
{
"ex_id": "aqua-test-141",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.390625,
"scores": {
"A": -15.65625,
"B": -14.0,
"C": -12.3359375,
"D": -12.7265625,
"E": -13.421875
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.3671875,
"scores": {
"A": -15.03125,
"B": -13.484375,
"C": -11.953125,
"D": -12.3203125,
"E": -12.7890625
}
},
"flip": false
},
{
"ex_id": "aqua-test-148",
"gold": "D",
"baseline": {
"pred_label": "D",
"correct": true,
"margin": 0.09375,
"scores": {
"A": -11.84375,
"B": -8.6875,
"C": -9.390625,
"D": -8.59375,
"E": -10.328125
}
},
"ablated": {
"pred_label": "D",
"correct": true,
"margin": 0.015625,
"scores": {
"A": -11.125,
"B": -7.8671875,
"C": -8.59375,
"D": -7.8515625,
"E": -9.5
}
},
"flip": false
},
{
"ex_id": "aqua-test-152",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.65625,
"scores": {
"A": -12.15625,
"B": -11.09375,
"C": -11.75,
"D": -11.765625,
"E": -11.75
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.546875,
"scores": {
"A": -11.421875,
"B": -10.640625,
"C": -11.1875,
"D": -11.34375,
"E": -11.21875
}
},
"flip": false
},
{
"ex_id": "aqua-test-167",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 2.28125,
"scores": {
"A": -13.3125,
"B": -10.640625,
"C": -12.921875,
"D": -16.09375,
"E": -14.75
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 2.1171875,
"scores": {
"A": -12.859375,
"B": -10.4921875,
"C": -12.609375,
"D": -15.53125,
"E": -14.4765625
}
},
"flip": false
},
{
"ex_id": "aqua-test-178",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.4609375,
"scores": {
"A": -13.1953125,
"B": -11.1015625,
"C": -12.8203125,
"D": -12.625,
"E": -10.640625
}
},
"ablated": {
"pred_label": "E",
"correct": true,
"margin": 0.515625,
"scores": {
"A": -12.34375,
"B": -10.1875,
"C": -11.78125,
"D": -11.53125,
"E": -9.671875
}
},
"flip": false
},
{
"ex_id": "aqua-test-181",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.7734375,
"scores": {
"A": -9.6328125,
"B": -8.859375,
"C": -11.828125,
"D": -11.640625,
"E": -10.6875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.625,
"scores": {
"A": -9.0,
"B": -8.375,
"C": -11.2734375,
"D": -11.1875,
"E": -10.2265625
}
},
"flip": false
},
{
"ex_id": "aqua-test-183",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.40625,
"scores": {
"A": -12.6484375,
"B": -9.3671875,
"C": -10.7734375,
"D": -13.140625,
"E": -13.125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.140625,
"scores": {
"A": -12.3125,
"B": -9.15625,
"C": -10.296875,
"D": -12.921875,
"E": -12.953125
}
},
"flip": false
},
{
"ex_id": "aqua-test-189",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.2109375,
"scores": {
"A": -14.5390625,
"B": -11.546875,
"C": -11.8046875,
"D": -11.7578125,
"E": -13.34375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.3125,
"scores": {
"A": -14.015625,
"B": -11.3125,
"C": -11.625,
"D": -11.765625,
"E": -12.8671875
}
},
"flip": false
},
{
"ex_id": "aqua-test-190",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.34375,
"scores": {
"A": -13.6015625,
"B": -10.7734375,
"C": -10.4296875,
"D": -13.3671875,
"E": -13.9296875
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.359375,
"scores": {
"A": -13.125,
"B": -10.3125,
"C": -9.953125,
"D": -13.0625,
"E": -13.9453125
}
},
"flip": false
},
{
"ex_id": "aqua-test-191",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -11.890625,
"B": -10.9375,
"C": -13.640625,
"D": -14.109375,
"E": -13.765625
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.859375,
"scores": {
"A": -11.03125,
"B": -10.171875,
"C": -12.625,
"D": -13.140625,
"E": -12.796875
}
},
"flip": false
},
{
"ex_id": "aqua-test-206",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.1796875,
"scores": {
"A": -11.6171875,
"B": -10.96875,
"C": -11.1484375,
"D": -12.84375,
"E": -14.0
}
},
"ablated": {
"pred_label": "C",
"correct": false,
"margin": -0.390625,
"scores": {
"A": -11.2578125,
"B": -10.9609375,
"C": -10.5703125,
"D": -12.4375,
"E": -13.6015625
}
},
"flip": true
},
{
"ex_id": "aqua-test-212",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.5078125,
"scores": {
"A": -11.59375,
"B": -9.609375,
"C": -11.1171875,
"D": -11.7421875,
"E": -12.8359375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.515625,
"scores": {
"A": -11.078125,
"B": -9.1875,
"C": -10.703125,
"D": -11.40625,
"E": -12.609375
}
},
"flip": false
},
{
"ex_id": "aqua-test-223",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.09375,
"scores": {
"A": -10.265625,
"B": -7.1484375,
"C": -10.2421875,
"D": -10.921875,
"E": -11.3359375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 3.0859375,
"scores": {
"A": -10.2109375,
"B": -6.890625,
"C": -9.9765625,
"D": -10.75,
"E": -11.4296875
}
},
"flip": false
},
{
"ex_id": "aqua-test-228",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.078125,
"scores": {
"A": -13.3125,
"B": -10.71875,
"C": -10.796875,
"D": -14.0859375,
"E": -14.8984375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.1640625,
"scores": {
"A": -12.7578125,
"B": -10.2578125,
"C": -10.421875,
"D": -13.53125,
"E": -14.1953125
}
},
"flip": false
},
{
"ex_id": "aqua-test-249",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.828125,
"scores": {
"A": -10.328125,
"B": -8.859375,
"C": -8.03125,
"D": -11.640625,
"E": -11.109375
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.71875,
"scores": {
"A": -10.2578125,
"B": -8.7734375,
"C": -8.0546875,
"D": -11.65625,
"E": -10.9140625
}
},
"flip": false
},
{
"ex_id": "aqua-test-251",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.078125,
"scores": {
"A": -9.90625,
"B": -9.828125,
"C": -12.40625,
"D": -11.265625,
"E": -10.703125
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.3671875,
"scores": {
"A": -8.8984375,
"B": -9.265625,
"C": -11.75,
"D": -10.6015625,
"E": -9.9921875
}
},
"flip": true
}
],
"0.5": [
{
"ex_id": "aqua-test-2",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.2578125,
"scores": {
"A": -11.234375,
"B": -10.2109375,
"C": -13.171875,
"D": -12.4453125,
"E": -10.46875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.6796875,
"scores": {
"A": -8.2890625,
"B": -7.609375,
"C": -10.546875,
"D": -11.421875,
"E": -9.6484375
}
},
"flip": false
},
{
"ex_id": "aqua-test-5",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -11.9921875,
"B": -10.9765625,
"C": -12.0390625,
"D": -11.9609375,
"E": -11.9296875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.5,
"scores": {
"A": -9.90625,
"B": -9.3125,
"C": -9.8125,
"D": -10.203125,
"E": -10.078125
}
},
"flip": false
},
{
"ex_id": "aqua-test-9",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.078125,
"scores": {
"A": -11.265625,
"B": -8.890625,
"C": -9.96875,
"D": -12.359375,
"E": -13.9921875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.265625,
"scores": {
"A": -9.953125,
"B": -8.109375,
"C": -9.375,
"D": -11.640625,
"E": -13.875
}
},
"flip": false
},
{
"ex_id": "aqua-test-15",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.671875,
"scores": {
"A": -11.078125,
"B": -10.40625,
"C": -13.625,
"D": -15.3125,
"E": -13.8125
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.125,
"scores": {
"A": -8.296875,
"B": -8.421875,
"C": -10.796875,
"D": -12.859375,
"E": -12.9375
}
},
"flip": true
},
{
"ex_id": "aqua-test-16",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 2.80078125,
"scores": {
"A": -12.484375,
"B": -10.515625,
"C": -7.71484375,
"D": -12.859375,
"E": -12.8125
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 3.44140625,
"scores": {
"A": -8.984375,
"B": -8.984375,
"C": -5.54296875,
"D": -9.96875,
"E": -10.765625
}
},
"flip": false
},
{
"ex_id": "aqua-test-21",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.765625,
"scores": {
"A": -10.4140625,
"B": -9.6484375,
"C": -12.5546875,
"D": -12.234375,
"E": -11.3828125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.3984375,
"scores": {
"A": -9.015625,
"B": -7.6171875,
"C": -11.046875,
"D": -11.390625,
"E": -10.84375
}
},
"flip": false
},
{
"ex_id": "aqua-test-25",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.0546875,
"scores": {
"A": -12.953125,
"B": -12.2578125,
"C": -12.203125,
"D": -12.4140625,
"E": -13.6328125
}
},
"ablated": {
"pred_label": "B",
"correct": false,
"margin": -0.140625,
"scores": {
"A": -10.734375,
"B": -9.5,
"C": -9.640625,
"D": -10.234375,
"E": -10.859375
}
},
"flip": true
},
{
"ex_id": "aqua-test-33",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 1.1875,
"scores": {
"A": -17.28125,
"B": -18.1875,
"C": -16.09375,
"D": -19.15625,
"E": -19.46875
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 1.453125,
"scores": {
"A": -11.296875,
"B": -10.015625,
"C": -8.5625,
"D": -11.7890625,
"E": -11.21875
}
},
"flip": false
},
{
"ex_id": "aqua-test-39",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 1.71875,
"scores": {
"A": -10.2265625,
"B": -11.9453125,
"C": -12.1484375,
"D": -14.3125,
"E": -14.015625
}
},
"ablated": {
"pred_label": "A",
"correct": true,
"margin": 0.9375,
"scores": {
"A": -8.546875,
"B": -9.484375,
"C": -9.75,
"D": -11.703125,
"E": -10.671875
}
},
"flip": false
},
{
"ex_id": "aqua-test-47",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.203125,
"scores": {
"A": -11.9453125,
"B": -12.5,
"C": -12.1171875,
"D": -13.046875,
"E": -11.7421875
}
},
"ablated": {
"pred_label": "C",
"correct": false,
"margin": -0.15625,
"scores": {
"A": -12.296875,
"B": -13.421875,
"C": -12.203125,
"D": -13.125,
"E": -12.359375
}
},
"flip": true
},
{
"ex_id": "aqua-test-52",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.0625,
"scores": {
"A": -12.890625,
"B": -9.8515625,
"C": -9.9140625,
"D": -11.515625,
"E": -10.6875
}
},
"ablated": {
"pred_label": "C",
"correct": false,
"margin": -0.140625,
"scores": {
"A": -10.453125,
"B": -7.85546875,
"C": -7.71484375,
"D": -10.40625,
"E": -10.203125
}
},
"flip": true
},
{
"ex_id": "aqua-test-57",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.15625,
"scores": {
"A": -13.875,
"B": -12.96875,
"C": -14.359375,
"D": -14.140625,
"E": -13.125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.3515625,
"scores": {
"A": -12.3203125,
"B": -11.6171875,
"C": -12.796875,
"D": -12.8359375,
"E": -11.96875
}
},
"flip": false
},
{
"ex_id": "aqua-test-68",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.6875,
"scores": {
"A": -11.65625,
"B": -10.96875,
"C": -11.875,
"D": -12.078125,
"E": -12.640625
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.578125,
"scores": {
"A": -10.34375,
"B": -9.65625,
"C": -10.234375,
"D": -11.578125,
"E": -11.234375
}
},
"flip": false
},
{
"ex_id": "aqua-test-78",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.078125,
"scores": {
"A": -12.7890625,
"B": -8.3203125,
"C": -11.3984375,
"D": -13.765625,
"E": -13.84375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 3.0625,
"scores": {
"A": -11.1640625,
"B": -7.296875,
"C": -10.359375,
"D": -12.5546875,
"E": -13.0703125
}
},
"flip": false
},
{
"ex_id": "aqua-test-87",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.625,
"scores": {
"A": -9.7890625,
"B": -9.1640625,
"C": -11.234375,
"D": -12.0,
"E": -11.46875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.171875,
"scores": {
"A": -8.96875,
"B": -8.796875,
"C": -10.609375,
"D": -11.2421875,
"E": -10.7421875
}
},
"flip": false
},
{
"ex_id": "aqua-test-100",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.1484375,
"scores": {
"A": -9.265625,
"B": -9.7265625,
"C": -9.1171875,
"D": -10.0546875,
"E": -10.6015625
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.234375,
"scores": {
"A": -7.7421875,
"B": -8.1015625,
"C": -7.5078125,
"D": -8.6875,
"E": -9.671875
}
},
"flip": false
},
{
"ex_id": "aqua-test-103",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.1484375,
"scores": {
"A": -9.734375,
"B": -8.5234375,
"C": -9.6875,
"D": -11.4375,
"E": -9.671875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.71875,
"scores": {
"A": -8.4921875,
"B": -7.7734375,
"C": -9.0703125,
"D": -11.109375,
"E": -9.640625
}
},
"flip": false
},
{
"ex_id": "aqua-test-105",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 3.03125,
"scores": {
"A": -11.5,
"B": -12.0234375,
"C": -8.46875,
"D": -13.9765625,
"E": -13.28125
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 2.9140625,
"scores": {
"A": -9.4140625,
"B": -11.140625,
"C": -6.5,
"D": -13.234375,
"E": -12.96875
}
},
"flip": false
},
{
"ex_id": "aqua-test-111",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.203125,
"scores": {
"A": -9.796875,
"B": -9.2734375,
"C": -9.4765625,
"D": -10.7578125,
"E": -11.4296875
}
},
"ablated": {
"pred_label": "C",
"correct": false,
"margin": -0.3984375,
"scores": {
"A": -8.8828125,
"B": -8.9140625,
"C": -8.515625,
"D": -9.6953125,
"E": -10.2109375
}
},
"flip": true
},
{
"ex_id": "aqua-test-116",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.7109375,
"scores": {
"A": -12.0390625,
"B": -9.7421875,
"C": -11.453125,
"D": -11.5390625,
"E": -11.8203125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.6875,
"scores": {
"A": -9.0234375,
"B": -7.3359375,
"C": -10.3359375,
"D": -10.328125,
"E": -10.328125
}
},
"flip": false
},
{
"ex_id": "aqua-test-120",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.3515625,
"scores": {
"A": -12.625,
"B": -10.171875,
"C": -10.5234375,
"D": -11.96875,
"E": -12.625
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.015625,
"scores": {
"A": -10.9296875,
"B": -8.8203125,
"C": -8.8359375,
"D": -10.6171875,
"E": -11.7109375
}
},
"flip": false
},
{
"ex_id": "aqua-test-122",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.671875,
"scores": {
"A": -11.09375,
"B": -10.421875,
"C": -13.25,
"D": -13.296875,
"E": -13.5
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.296875,
"scores": {
"A": -9.921875,
"B": -9.625,
"C": -12.3203125,
"D": -12.46875,
"E": -13.0625
}
},
"flip": false
},
{
"ex_id": "aqua-test-123",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 1.9296875,
"scores": {
"A": -12.8125,
"B": -13.265625,
"C": -10.09375,
"D": -12.0234375,
"E": -12.84375
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 1.0390625,
"scores": {
"A": -9.3984375,
"B": -10.296875,
"C": -8.359375,
"D": -9.796875,
"E": -10.875
}
},
"flip": false
},
{
"ex_id": "aqua-test-125",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.25,
"scores": {
"A": -12.8984375,
"B": -12.015625,
"C": -10.3671875,
"D": -10.6171875,
"E": -11.515625
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 1.2109375,
"scores": {
"A": -9.1640625,
"B": -8.953125,
"C": -7.7421875,
"D": -9.046875,
"E": -9.6796875
}
},
"flip": false
},
{
"ex_id": "aqua-test-130",
"gold": "D",
"baseline": {
"pred_label": "D",
"correct": true,
"margin": 0.5625,
"scores": {
"A": -12.53125,
"B": -11.625,
"C": -14.40625,
"D": -11.0625,
"E": -12.203125
}
},
"ablated": {
"pred_label": "E",
"correct": false,
"margin": -0.578125,
"scores": {
"A": -9.3125,
"B": -8.5,
"C": -9.578125,
"D": -8.78125,
"E": -8.203125
}
},
"flip": true
},
{
"ex_id": "aqua-test-140",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.09375,
"scores": {
"A": -12.171875,
"B": -10.953125,
"C": -12.484375,
"D": -12.046875,
"E": -12.828125
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.03125,
"scores": {
"A": -9.25,
"B": -9.28125,
"C": -9.96875,
"D": -10.46875,
"E": -11.1875
}
},
"flip": true
},
{
"ex_id": "aqua-test-141",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.390625,
"scores": {
"A": -15.65625,
"B": -14.0,
"C": -12.3359375,
"D": -12.7265625,
"E": -13.421875
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.390625,
"scores": {
"A": -13.2265625,
"B": -11.828125,
"C": -10.125,
"D": -10.515625,
"E": -10.65625
}
},
"flip": false
},
{
"ex_id": "aqua-test-148",
"gold": "D",
"baseline": {
"pred_label": "D",
"correct": true,
"margin": 0.09375,
"scores": {
"A": -11.84375,
"B": -8.6875,
"C": -9.390625,
"D": -8.59375,
"E": -10.328125
}
},
"ablated": {
"pred_label": "D",
"correct": true,
"margin": 0.1875,
"scores": {
"A": -11.15625,
"B": -8.8125,
"C": -8.5625,
"D": -8.375,
"E": -9.453125
}
},
"flip": false
},
{
"ex_id": "aqua-test-152",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.65625,
"scores": {
"A": -12.15625,
"B": -11.09375,
"C": -11.75,
"D": -11.765625,
"E": -11.75
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.78125,
"scores": {
"A": -9.078125,
"B": -9.859375,
"C": -10.28125,
"D": -9.53125,
"E": -9.421875
}
},
"flip": true
},
{
"ex_id": "aqua-test-167",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 2.28125,
"scores": {
"A": -13.3125,
"B": -10.640625,
"C": -12.921875,
"D": -16.09375,
"E": -14.75
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.3203125,
"scores": {
"A": -10.1953125,
"B": -8.875,
"C": -10.5625,
"D": -12.84375,
"E": -12.578125
}
},
"flip": false
},
{
"ex_id": "aqua-test-178",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.4609375,
"scores": {
"A": -13.1953125,
"B": -11.1015625,
"C": -12.8203125,
"D": -12.625,
"E": -10.640625
}
},
"ablated": {
"pred_label": "B",
"correct": false,
"margin": -1.3671875,
"scores": {
"A": -10.4296875,
"B": -8.125,
"C": -8.296875,
"D": -10.5546875,
"E": -9.4921875
}
},
"flip": true
},
{
"ex_id": "aqua-test-181",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.7734375,
"scores": {
"A": -9.6328125,
"B": -8.859375,
"C": -11.828125,
"D": -11.640625,
"E": -10.6875
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.28125,
"scores": {
"A": -8.0234375,
"B": -7.7421875,
"C": -9.6484375,
"D": -10.40625,
"E": -9.8203125
}
},
"flip": false
},
{
"ex_id": "aqua-test-183",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.40625,
"scores": {
"A": -12.6484375,
"B": -9.3671875,
"C": -10.7734375,
"D": -13.140625,
"E": -13.125
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.8203125,
"scores": {
"A": -11.0703125,
"B": -8.109375,
"C": -8.9296875,
"D": -12.046875,
"E": -12.53125
}
},
"flip": false
},
{
"ex_id": "aqua-test-189",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.2109375,
"scores": {
"A": -14.5390625,
"B": -11.546875,
"C": -11.8046875,
"D": -11.7578125,
"E": -13.34375
}
},
"ablated": {
"pred_label": "C",
"correct": false,
"margin": -0.4921875,
"scores": {
"A": -11.890625,
"B": -10.34375,
"C": -9.8515625,
"D": -10.515625,
"E": -10.8125
}
},
"flip": true
},
{
"ex_id": "aqua-test-190",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.34375,
"scores": {
"A": -13.6015625,
"B": -10.7734375,
"C": -10.4296875,
"D": -13.3671875,
"E": -13.9296875
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.4921875,
"scores": {
"A": -10.703125,
"B": -9.3515625,
"C": -8.859375,
"D": -11.4765625,
"E": -12.3984375
}
},
"flip": false
},
{
"ex_id": "aqua-test-191",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -11.890625,
"B": -10.9375,
"C": -13.640625,
"D": -14.109375,
"E": -13.765625
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.03125,
"scores": {
"A": -8.28125,
"B": -8.3125,
"C": -10.6328125,
"D": -10.890625,
"E": -11.5234375
}
},
"flip": true
},
{
"ex_id": "aqua-test-206",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.1796875,
"scores": {
"A": -11.6171875,
"B": -10.96875,
"C": -11.1484375,
"D": -12.84375,
"E": -14.0
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.109375,
"scores": {
"A": -10.09375,
"B": -9.984375,
"C": -10.5625,
"D": -11.59375,
"E": -13.203125
}
},
"flip": false
},
{
"ex_id": "aqua-test-212",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.5078125,
"scores": {
"A": -11.59375,
"B": -9.609375,
"C": -11.1171875,
"D": -11.7421875,
"E": -12.8359375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 1.359375,
"scores": {
"A": -9.8125,
"B": -8.453125,
"C": -10.125,
"D": -10.421875,
"E": -12.0
}
},
"flip": false
},
{
"ex_id": "aqua-test-223",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.09375,
"scores": {
"A": -10.265625,
"B": -7.1484375,
"C": -10.2421875,
"D": -10.921875,
"E": -11.3359375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 2.53125,
"scores": {
"A": -10.0625,
"B": -7.328125,
"C": -9.859375,
"D": -10.203125,
"E": -11.421875
}
},
"flip": false
},
{
"ex_id": "aqua-test-228",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.078125,
"scores": {
"A": -13.3125,
"B": -10.71875,
"C": -10.796875,
"D": -14.0859375,
"E": -14.8984375
}
},
"ablated": {
"pred_label": "B",
"correct": true,
"margin": 0.484375,
"scores": {
"A": -10.3125,
"B": -7.9140625,
"C": -8.3984375,
"D": -11.421875,
"E": -11.21875
}
},
"flip": false
},
{
"ex_id": "aqua-test-249",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.828125,
"scores": {
"A": -10.328125,
"B": -8.859375,
"C": -8.03125,
"D": -11.640625,
"E": -11.109375
}
},
"ablated": {
"pred_label": "C",
"correct": true,
"margin": 0.03125,
"scores": {
"A": -9.078125,
"B": -7.50390625,
"C": -7.47265625,
"D": -10.890625,
"E": -9.953125
}
},
"flip": false
},
{
"ex_id": "aqua-test-251",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.078125,
"scores": {
"A": -9.90625,
"B": -9.828125,
"C": -12.40625,
"D": -11.265625,
"E": -10.703125
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.390625,
"scores": {
"A": -8.671875,
"B": -9.0625,
"C": -11.2734375,
"D": -10.6015625,
"E": -9.640625
}
},
"flip": true
}
],
"1.0": [
{
"ex_id": "aqua-test-2",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.2578125,
"scores": {
"A": -11.234375,
"B": -10.2109375,
"C": -13.171875,
"D": -12.4453125,
"E": -10.46875
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -7.953125,
"scores": {
"A": -6.0625,
"B": -14.015625,
"C": -17.125,
"D": -15.2734375,
"E": -15.640625
}
},
"flip": true
},
{
"ex_id": "aqua-test-5",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -11.9921875,
"B": -10.9765625,
"C": -12.0390625,
"D": -11.9609375,
"E": -11.9296875
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.1640625,
"scores": {
"A": -7.59375,
"B": -9.7578125,
"C": -11.0234375,
"D": -9.1953125,
"E": -10.0625
}
},
"flip": true
},
{
"ex_id": "aqua-test-9",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.078125,
"scores": {
"A": -11.265625,
"B": -8.890625,
"C": -9.96875,
"D": -12.359375,
"E": -13.9921875
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.01953125,
"scores": {
"A": -7.32421875,
"B": -11.34375,
"C": -11.5,
"D": -13.6875,
"E": -15.28125
}
},
"flip": true
},
{
"ex_id": "aqua-test-15",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.671875,
"scores": {
"A": -11.078125,
"B": -10.40625,
"C": -13.625,
"D": -15.3125,
"E": -13.8125
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.7421875,
"scores": {
"A": -6.1171875,
"B": -10.859375,
"C": -11.296875,
"D": -11.0625,
"E": -13.578125
}
},
"flip": true
},
{
"ex_id": "aqua-test-16",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 2.80078125,
"scores": {
"A": -12.484375,
"B": -10.515625,
"C": -7.71484375,
"D": -12.859375,
"E": -12.8125
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.87109375,
"scores": {
"A": -7.82421875,
"B": -9.453125,
"C": -8.6953125,
"D": -9.59375,
"E": -11.6953125
}
},
"flip": true
},
{
"ex_id": "aqua-test-21",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.765625,
"scores": {
"A": -10.4140625,
"B": -9.6484375,
"C": -12.5546875,
"D": -12.234375,
"E": -11.3828125
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.5390625,
"scores": {
"A": -6.1328125,
"B": -9.671875,
"C": -10.8046875,
"D": -10.671875,
"E": -11.2109375
}
},
"flip": true
},
{
"ex_id": "aqua-test-25",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.0546875,
"scores": {
"A": -12.953125,
"B": -12.2578125,
"C": -12.203125,
"D": -12.4140625,
"E": -13.6328125
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.5625,
"scores": {
"A": -8.6484375,
"B": -10.8828125,
"C": -10.2109375,
"D": -9.4609375,
"E": -10.859375
}
},
"flip": true
},
{
"ex_id": "aqua-test-33",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 1.1875,
"scores": {
"A": -17.28125,
"B": -18.1875,
"C": -16.09375,
"D": -19.15625,
"E": -19.46875
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.4375,
"scores": {
"A": -9.125,
"B": -10.1171875,
"C": -9.5625,
"D": -10.0703125,
"E": -10.4921875
}
},
"flip": true
},
{
"ex_id": "aqua-test-39",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 1.71875,
"scores": {
"A": -10.2265625,
"B": -11.9453125,
"C": -12.1484375,
"D": -14.3125,
"E": -14.015625
}
},
"ablated": {
"pred_label": "C",
"correct": false,
"margin": -0.390625,
"scores": {
"A": -10.234375,
"B": -10.1875,
"C": -9.84375,
"D": -11.59375,
"E": -10.8515625
}
},
"flip": true
},
{
"ex_id": "aqua-test-47",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.203125,
"scores": {
"A": -11.9453125,
"B": -12.5,
"C": -12.1171875,
"D": -13.046875,
"E": -11.7421875
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.0390625,
"scores": {
"A": -9.3828125,
"B": -11.8515625,
"C": -13.359375,
"D": -12.15625,
"E": -13.421875
}
},
"flip": true
},
{
"ex_id": "aqua-test-52",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.0625,
"scores": {
"A": -12.890625,
"B": -9.8515625,
"C": -9.9140625,
"D": -11.515625,
"E": -10.6875
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.4765625,
"scores": {
"A": -5.1328125,
"B": -5.609375,
"C": -6.609375,
"D": -6.8984375,
"E": -6.4296875
}
},
"flip": true
},
{
"ex_id": "aqua-test-57",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.15625,
"scores": {
"A": -13.875,
"B": -12.96875,
"C": -14.359375,
"D": -14.140625,
"E": -13.125
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.640625,
"scores": {
"A": -10.3125,
"B": -12.953125,
"C": -12.8203125,
"D": -12.8359375,
"E": -12.5078125
}
},
"flip": true
},
{
"ex_id": "aqua-test-68",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.6875,
"scores": {
"A": -11.65625,
"B": -10.96875,
"C": -11.875,
"D": -12.078125,
"E": -12.640625
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.1953125,
"scores": {
"A": -9.03125,
"B": -11.2265625,
"C": -11.265625,
"D": -11.5234375,
"E": -10.6171875
}
},
"flip": true
},
{
"ex_id": "aqua-test-78",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.078125,
"scores": {
"A": -12.7890625,
"B": -8.3203125,
"C": -11.3984375,
"D": -13.765625,
"E": -13.84375
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.63671875,
"scores": {
"A": -5.62890625,
"B": -10.265625,
"C": -11.3125,
"D": -12.078125,
"E": -12.4296875
}
},
"flip": true
},
{
"ex_id": "aqua-test-87",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.625,
"scores": {
"A": -9.7890625,
"B": -9.1640625,
"C": -11.234375,
"D": -12.0,
"E": -11.46875
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.90625,
"scores": {
"A": -6.578125,
"B": -9.484375,
"C": -9.703125,
"D": -9.1875,
"E": -11.25
}
},
"flip": true
},
{
"ex_id": "aqua-test-100",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.1484375,
"scores": {
"A": -9.265625,
"B": -9.7265625,
"C": -9.1171875,
"D": -10.0546875,
"E": -10.6015625
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.16015625,
"scores": {
"A": -4.76171875,
"B": -9.390625,
"C": -10.921875,
"D": -11.46875,
"E": -13.1875
}
},
"flip": true
},
{
"ex_id": "aqua-test-103",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.1484375,
"scores": {
"A": -9.734375,
"B": -8.5234375,
"C": -9.6875,
"D": -11.4375,
"E": -9.671875
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.75,
"scores": {
"A": -7.84375,
"B": -8.59375,
"C": -10.53125,
"D": -9.78125,
"E": -8.421875
}
},
"flip": true
},
{
"ex_id": "aqua-test-105",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 3.03125,
"scores": {
"A": -11.5,
"B": -12.0234375,
"C": -8.46875,
"D": -13.9765625,
"E": -13.28125
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.12109375,
"scores": {
"A": -7.45703125,
"B": -9.78125,
"C": -8.578125,
"D": -13.375,
"E": -13.59375
}
},
"flip": true
},
{
"ex_id": "aqua-test-111",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.203125,
"scores": {
"A": -9.796875,
"B": -9.2734375,
"C": -9.4765625,
"D": -10.7578125,
"E": -11.4296875
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.6328125,
"scores": {
"A": -8.4140625,
"B": -12.046875,
"C": -12.484375,
"D": -13.3125,
"E": -14.53125
}
},
"flip": true
},
{
"ex_id": "aqua-test-116",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.7109375,
"scores": {
"A": -12.0390625,
"B": -9.7421875,
"C": -11.453125,
"D": -11.5390625,
"E": -11.8203125
}
},
"ablated": {
"pred_label": "D",
"correct": false,
"margin": -0.51171875,
"scores": {
"A": -7.28125,
"B": -7.4921875,
"C": -8.8203125,
"D": -6.98046875,
"E": -7.3046875
}
},
"flip": true
},
{
"ex_id": "aqua-test-120",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.3515625,
"scores": {
"A": -12.625,
"B": -10.171875,
"C": -10.5234375,
"D": -11.96875,
"E": -12.625
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.84375,
"scores": {
"A": -7.4375,
"B": -12.28125,
"C": -11.75,
"D": -11.984375,
"E": -13.75
}
},
"flip": true
},
{
"ex_id": "aqua-test-122",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.671875,
"scores": {
"A": -11.09375,
"B": -10.421875,
"C": -13.25,
"D": -13.296875,
"E": -13.5
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.328125,
"scores": {
"A": -6.90625,
"B": -9.234375,
"C": -12.421875,
"D": -11.0,
"E": -12.1796875
}
},
"flip": true
},
{
"ex_id": "aqua-test-123",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 1.9296875,
"scores": {
"A": -12.8125,
"B": -13.265625,
"C": -10.09375,
"D": -12.0234375,
"E": -12.84375
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.0625,
"scores": {
"A": -7.71875,
"B": -10.9296875,
"C": -11.78125,
"D": -11.5546875,
"E": -13.9375
}
},
"flip": true
},
{
"ex_id": "aqua-test-125",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.25,
"scores": {
"A": -12.8984375,
"B": -12.015625,
"C": -10.3671875,
"D": -10.6171875,
"E": -11.515625
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.16796875,
"scores": {
"A": -6.90234375,
"B": -8.6875,
"C": -10.0703125,
"D": -9.84375,
"E": -10.1640625
}
},
"flip": true
},
{
"ex_id": "aqua-test-130",
"gold": "D",
"baseline": {
"pred_label": "D",
"correct": true,
"margin": 0.5625,
"scores": {
"A": -12.53125,
"B": -11.625,
"C": -14.40625,
"D": -11.0625,
"E": -12.203125
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.62890625,
"scores": {
"A": -7.70703125,
"B": -7.73046875,
"C": -10.4296875,
"D": -8.3359375,
"E": -8.21875
}
},
"flip": true
},
{
"ex_id": "aqua-test-140",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.09375,
"scores": {
"A": -12.171875,
"B": -10.953125,
"C": -12.484375,
"D": -12.046875,
"E": -12.828125
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.09375,
"scores": {
"A": -8.640625,
"B": -9.734375,
"C": -10.828125,
"D": -10.234375,
"E": -11.625
}
},
"flip": true
},
{
"ex_id": "aqua-test-141",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.390625,
"scores": {
"A": -15.65625,
"B": -14.0,
"C": -12.3359375,
"D": -12.7265625,
"E": -13.421875
}
},
"ablated": {
"pred_label": "E",
"correct": false,
"margin": -1.5078125,
"scores": {
"A": -9.03125,
"B": -10.1640625,
"C": -9.59375,
"D": -8.109375,
"E": -8.0859375
}
},
"flip": true
},
{
"ex_id": "aqua-test-148",
"gold": "D",
"baseline": {
"pred_label": "D",
"correct": true,
"margin": 0.09375,
"scores": {
"A": -11.84375,
"B": -8.6875,
"C": -9.390625,
"D": -8.59375,
"E": -10.328125
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -0.2890625,
"scores": {
"A": -8.21875,
"B": -9.21875,
"C": -9.4140625,
"D": -8.5078125,
"E": -9.5859375
}
},
"flip": true
},
{
"ex_id": "aqua-test-152",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.65625,
"scores": {
"A": -12.15625,
"B": -11.09375,
"C": -11.75,
"D": -11.765625,
"E": -11.75
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.6015625,
"scores": {
"A": -8.2734375,
"B": -12.875,
"C": -16.15625,
"D": -13.109375,
"E": -13.90625
}
},
"flip": true
},
{
"ex_id": "aqua-test-167",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 2.28125,
"scores": {
"A": -13.3125,
"B": -10.640625,
"C": -12.921875,
"D": -16.09375,
"E": -14.75
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.328125,
"scores": {
"A": -8.015625,
"B": -11.34375,
"C": -15.125,
"D": -13.3125,
"E": -14.375
}
},
"flip": true
},
{
"ex_id": "aqua-test-178",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.4609375,
"scores": {
"A": -13.1953125,
"B": -11.1015625,
"C": -12.8203125,
"D": -12.625,
"E": -10.640625
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.72265625,
"scores": {
"A": -5.77734375,
"B": -10.375,
"C": -8.5,
"D": -10.84375,
"E": -12.5
}
},
"flip": true
},
{
"ex_id": "aqua-test-181",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.7734375,
"scores": {
"A": -9.6328125,
"B": -8.859375,
"C": -11.828125,
"D": -11.640625,
"E": -10.6875
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.5078125,
"scores": {
"A": -7.5546875,
"B": -9.0625,
"C": -10.4453125,
"D": -9.140625,
"E": -9.0078125
}
},
"flip": true
},
{
"ex_id": "aqua-test-183",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.40625,
"scores": {
"A": -12.6484375,
"B": -9.3671875,
"C": -10.7734375,
"D": -13.140625,
"E": -13.125
}
},
"ablated": {
"pred_label": "C",
"correct": false,
"margin": -0.71875,
"scores": {
"A": -8.6640625,
"B": -7.5078125,
"C": -6.7890625,
"D": -9.3046875,
"E": -10.7578125
}
},
"flip": true
},
{
"ex_id": "aqua-test-189",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.2109375,
"scores": {
"A": -14.5390625,
"B": -11.546875,
"C": -11.8046875,
"D": -11.7578125,
"E": -13.34375
}
},
"ablated": {
"pred_label": "D",
"correct": false,
"margin": -0.84375,
"scores": {
"A": -9.875,
"B": -10.3828125,
"C": -10.28125,
"D": -9.5390625,
"E": -9.5625
}
},
"flip": true
},
{
"ex_id": "aqua-test-190",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.34375,
"scores": {
"A": -13.6015625,
"B": -10.7734375,
"C": -10.4296875,
"D": -13.3671875,
"E": -13.9296875
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -6.5,
"scores": {
"A": -6.3515625,
"B": -9.2578125,
"C": -12.8515625,
"D": -10.84375,
"E": -12.953125
}
},
"flip": true
},
{
"ex_id": "aqua-test-191",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -11.890625,
"B": -10.9375,
"C": -13.640625,
"D": -14.109375,
"E": -13.765625
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.09375,
"scores": {
"A": -6.4375,
"B": -8.53125,
"C": -11.7734375,
"D": -11.28125,
"E": -12.3203125
}
},
"flip": true
},
{
"ex_id": "aqua-test-206",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.1796875,
"scores": {
"A": -11.6171875,
"B": -10.96875,
"C": -11.1484375,
"D": -12.84375,
"E": -14.0
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.03125,
"scores": {
"A": -8.0,
"B": -11.03125,
"C": -12.328125,
"D": -11.765625,
"E": -14.171875
}
},
"flip": true
},
{
"ex_id": "aqua-test-212",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.5078125,
"scores": {
"A": -11.59375,
"B": -9.609375,
"C": -11.1171875,
"D": -11.7421875,
"E": -12.8359375
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -3.390625,
"scores": {
"A": -4.984375,
"B": -8.375,
"C": -11.28125,
"D": -9.4140625,
"E": -11.890625
}
},
"flip": true
},
{
"ex_id": "aqua-test-223",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.09375,
"scores": {
"A": -10.265625,
"B": -7.1484375,
"C": -10.2421875,
"D": -10.921875,
"E": -11.3359375
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -4.09765625,
"scores": {
"A": -4.67578125,
"B": -8.7734375,
"C": -12.375,
"D": -11.1796875,
"E": -13.109375
}
},
"flip": true
},
{
"ex_id": "aqua-test-228",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.078125,
"scores": {
"A": -13.3125,
"B": -10.71875,
"C": -10.796875,
"D": -14.0859375,
"E": -14.8984375
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -1.28125,
"scores": {
"A": -7.171875,
"B": -8.453125,
"C": -9.515625,
"D": -10.125,
"E": -8.953125
}
},
"flip": true
},
{
"ex_id": "aqua-test-249",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.828125,
"scores": {
"A": -10.328125,
"B": -8.859375,
"C": -8.03125,
"D": -11.640625,
"E": -11.109375
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.59375,
"scores": {
"A": -5.9921875,
"B": -7.609375,
"C": -8.5859375,
"D": -8.4375,
"E": -8.6796875
}
},
"flip": true
},
{
"ex_id": "aqua-test-251",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.078125,
"scores": {
"A": -9.90625,
"B": -9.828125,
"C": -12.40625,
"D": -11.265625,
"E": -10.703125
}
},
"ablated": {
"pred_label": "A",
"correct": false,
"margin": -2.984375,
"scores": {
"A": -9.296875,
"B": -12.28125,
"C": -13.5,
"D": -12.09375,
"E": -11.140625
}
},
"flip": true
}
]
},
"donors_meta": [
{
"n_donor_bank": 192,
"donor_source": "cross_task_eval",
"donor_tasks": [
"gsm8k",
"commonsenseqa",
"strategyqa"
],
"donor_n_eval": 64,
"donor_pick": "cyclic",
"donor_require_gold_in_candidates": false,
"donor_require_baseline_correct": false
}
],
"transfer_patching_summary_on_flipset": {
"patched_transfer": {
"n": 42,
"rescued": 33,
"rescued_pct": 78.57142857142857,
"mean_delta_margin_vs_ablated": 3.4120163917541504,
"median_delta_margin_vs_ablated": 3.212890625
},
"patched_self": {
"n": 42,
"rescued": 31,
"rescued_pct": 73.80952380952381,
"mean_delta_margin_vs_ablated": 3.3116629123687744,
"median_delta_margin_vs_ablated": 3.0859375
}
},
"transfer_patching_rows": [
{
"ex_id": "aqua-test-2",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.2578125,
"scores": {
"A": -11.234375,
"B": -10.2109375,
"C": -13.171875,
"D": -12.4453125,
"E": -10.46875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -7.953125,
"scores": {
"A": -6.0625,
"B": -14.015625,
"C": -17.125,
"D": -15.2734375,
"E": -15.640625
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 1.7109375,
"scores": {
"A": -9.8203125,
"B": -7.5546875,
"C": -9.265625,
"D": -10.609375,
"E": -10.28125
}
},
"transfer_donor_ex_id": "strategyqa-test-19",
"transfer_donor_gold": "NO",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 1.3828125,
"scores": {
"A": -9.859375,
"B": -8.4765625,
"C": -10.296875,
"D": -10.875,
"E": -10.703125
}
}
},
{
"ex_id": "aqua-test-5",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -11.9921875,
"B": -10.9765625,
"C": -12.0390625,
"D": -11.9609375,
"E": -11.9296875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.1640625,
"scores": {
"A": -7.59375,
"B": -9.7578125,
"C": -11.0234375,
"D": -9.1953125,
"E": -10.0625
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 0.1796875,
"scores": {
"A": -8.765625,
"B": -7.5,
"C": -7.6796875,
"D": -8.9609375,
"E": -9.078125
}
},
"transfer_donor_ex_id": "strategyqa-test-12",
"transfer_donor_gold": "YES",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 0.15625,
"scores": {
"A": -9.109375,
"B": -8.265625,
"C": -8.421875,
"D": -9.328125,
"E": -9.609375
}
}
},
{
"ex_id": "aqua-test-9",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.078125,
"scores": {
"A": -11.265625,
"B": -8.890625,
"C": -9.96875,
"D": -12.359375,
"E": -13.9921875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.01953125,
"scores": {
"A": -7.32421875,
"B": -11.34375,
"C": -11.5,
"D": -13.6875,
"E": -15.28125
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 0.9921875,
"scores": {
"A": -10.1015625,
"B": -6.890625,
"C": -7.8828125,
"D": -10.5546875,
"E": -11.984375
}
},
"transfer_donor_ex_id": "strategyqa-test-39",
"transfer_donor_gold": "YES",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 1.109375,
"scores": {
"A": -10.0625,
"B": -7.453125,
"C": -8.5625,
"D": -10.875,
"E": -12.3671875
}
}
},
{
"ex_id": "aqua-test-15",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.671875,
"scores": {
"A": -11.078125,
"B": -10.40625,
"C": -13.625,
"D": -15.3125,
"E": -13.8125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.7421875,
"scores": {
"A": -6.1171875,
"B": -10.859375,
"C": -11.296875,
"D": -11.0625,
"E": -13.578125
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 1.24609375,
"scores": {
"A": -8.53125,
"B": -7.28515625,
"C": -9.3984375,
"D": -11.59375,
"E": -11.796875
}
},
"transfer_donor_ex_id": "strategyqa-test-18",
"transfer_donor_gold": "YES",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 0.7578125,
"scores": {
"A": -9.328125,
"B": -8.5703125,
"C": -10.296875,
"D": -11.6328125,
"E": -11.7734375
}
}
},
{
"ex_id": "aqua-test-16",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 2.80078125,
"scores": {
"A": -12.484375,
"B": -10.515625,
"C": -7.71484375,
"D": -12.859375,
"E": -12.8125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -0.87109375,
"scores": {
"A": -7.82421875,
"B": -9.453125,
"C": -8.6953125,
"D": -9.59375,
"E": -11.6953125
}
},
"patched_transfer": {
"pred_label": "C",
"correct": true,
"margin": 1.41796875,
"scores": {
"A": -10.203125,
"B": -9.015625,
"C": -7.59765625,
"D": -9.90625,
"E": -11.15625
}
},
"transfer_donor_ex_id": "commonsenseqa-validation-11",
"transfer_donor_gold": "E",
"patched_self": {
"pred_label": "C",
"correct": true,
"margin": 1.3984375,
"scores": {
"A": -10.015625,
"B": -9.453125,
"C": -8.0546875,
"D": -10.046875,
"E": -11.390625
}
}
},
{
"ex_id": "aqua-test-21",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.765625,
"scores": {
"A": -10.4140625,
"B": -9.6484375,
"C": -12.5546875,
"D": -12.234375,
"E": -11.3828125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.5390625,
"scores": {
"A": -6.1328125,
"B": -9.671875,
"C": -10.8046875,
"D": -10.671875,
"E": -11.2109375
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 1.73046875,
"scores": {
"A": -10.0,
"B": -7.85546875,
"C": -9.5859375,
"D": -10.15625,
"E": -10.53125
}
},
"transfer_donor_ex_id": "strategyqa-test-47",
"transfer_donor_gold": "NO",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 1.625,
"scores": {
"A": -10.0078125,
"B": -8.3828125,
"C": -10.2265625,
"D": -10.3984375,
"E": -11.0625
}
}
},
{
"ex_id": "aqua-test-25",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.0546875,
"scores": {
"A": -12.953125,
"B": -12.2578125,
"C": -12.203125,
"D": -12.4140625,
"E": -13.6328125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.5625,
"scores": {
"A": -8.6484375,
"B": -10.8828125,
"C": -10.2109375,
"D": -9.4609375,
"E": -10.859375
}
},
"patched_transfer": {
"pred_label": "B",
"correct": false,
"margin": -0.203125,
"scores": {
"A": -9.4140625,
"B": -7.7890625,
"C": -7.9921875,
"D": -9.34375,
"E": -10.2578125
}
},
"transfer_donor_ex_id": "strategyqa-test-26",
"transfer_donor_gold": "NO",
"patched_self": {
"pred_label": "B",
"correct": false,
"margin": -0.171875,
"scores": {
"A": -9.7890625,
"B": -8.2734375,
"C": -8.4453125,
"D": -9.390625,
"E": -10.4609375
}
}
},
{
"ex_id": "aqua-test-33",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 1.1875,
"scores": {
"A": -17.28125,
"B": -18.1875,
"C": -16.09375,
"D": -19.15625,
"E": -19.46875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -0.4375,
"scores": {
"A": -9.125,
"B": -10.1171875,
"C": -9.5625,
"D": -10.0703125,
"E": -10.4921875
}
},
"patched_transfer": {
"pred_label": "C",
"correct": true,
"margin": 0.8828125,
"scores": {
"A": -10.7890625,
"B": -8.8828125,
"C": -8.0,
"D": -9.9453125,
"E": -10.921875
}
},
"transfer_donor_ex_id": "strategyqa-test-30",
"transfer_donor_gold": "YES",
"patched_self": {
"pred_label": "C",
"correct": true,
"margin": 0.9375,
"scores": {
"A": -10.890625,
"B": -9.453125,
"C": -8.515625,
"D": -10.0625,
"E": -11.015625
}
}
},
{
"ex_id": "aqua-test-39",
"gold": "A",
"baseline": {
"pred_label": "A",
"correct": true,
"margin": 1.71875,
"scores": {
"A": -10.2265625,
"B": -11.9453125,
"C": -12.1484375,
"D": -14.3125,
"E": -14.015625
}
},
"ablated_1": {
"pred_label": "C",
"correct": false,
"margin": -0.390625,
"scores": {
"A": -10.234375,
"B": -10.1875,
"C": -9.84375,
"D": -11.59375,
"E": -10.8515625
}
},
"patched_transfer": {
"pred_label": "C",
"correct": false,
"margin": -0.0078125,
"scores": {
"A": -9.1953125,
"B": -9.7578125,
"C": -9.1875,
"D": -10.234375,
"E": -9.25
}
},
"transfer_donor_ex_id": "gsm8k-test-60",
"transfer_donor_gold": "100",
"patched_self": {
"pred_label": "C",
"correct": false,
"margin": -0.265625,
"scores": {
"A": -9.4375,
"B": -9.75,
"C": -9.171875,
"D": -10.28125,
"E": -9.4609375
}
}
},
{
"ex_id": "aqua-test-47",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.203125,
"scores": {
"A": -11.9453125,
"B": -12.5,
"C": -12.1171875,
"D": -13.046875,
"E": -11.7421875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.0390625,
"scores": {
"A": -9.3828125,
"B": -11.8515625,
"C": -13.359375,
"D": -12.15625,
"E": -13.421875
}
},
"patched_transfer": {
"pred_label": "C",
"correct": false,
"margin": -1.03125,
"scores": {
"A": -10.859375,
"B": -11.03125,
"C": -9.9375,
"D": -11.546875,
"E": -10.96875
}
},
"transfer_donor_ex_id": "gsm8k-test-10",
"transfer_donor_gold": "2",
"patched_self": {
"pred_label": "C",
"correct": false,
"margin": -0.8125,
"scores": {
"A": -12.0,
"B": -12.4375,
"C": -11.296875,
"D": -12.40625,
"E": -12.109375
}
}
},
{
"ex_id": "aqua-test-52",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.0625,
"scores": {
"A": -12.890625,
"B": -9.8515625,
"C": -9.9140625,
"D": -11.515625,
"E": -10.6875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -0.4765625,
"scores": {
"A": -5.1328125,
"B": -5.609375,
"C": -6.609375,
"D": -6.8984375,
"E": -6.4296875
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 0.078125,
"scores": {
"A": -10.609375,
"B": -6.8203125,
"C": -6.8984375,
"D": -9.625,
"E": -9.734375
}
},
"transfer_donor_ex_id": "commonsenseqa-validation-60",
"transfer_donor_gold": "C",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 0.21875,
"scores": {
"A": -10.234375,
"B": -6.859375,
"C": -7.078125,
"D": -9.6015625,
"E": -9.796875
}
}
},
{
"ex_id": "aqua-test-57",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.15625,
"scores": {
"A": -13.875,
"B": -12.96875,
"C": -14.359375,
"D": -14.140625,
"E": -13.125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.640625,
"scores": {
"A": -10.3125,
"B": -12.953125,
"C": -12.8203125,
"D": -12.8359375,
"E": -12.5078125
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 0.5546875,
"scores": {
"A": -12.40625,
"B": -11.78125,
"C": -12.84375,
"D": -13.421875,
"E": -12.3359375
}
},
"transfer_donor_ex_id": "gsm8k-test-25",
"transfer_donor_gold": "6",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 0.3515625,
"scores": {
"A": -12.84375,
"B": -12.4296875,
"C": -13.4375,
"D": -13.765625,
"E": -12.78125
}
}
},
{
"ex_id": "aqua-test-68",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.6875,
"scores": {
"A": -11.65625,
"B": -10.96875,
"C": -11.875,
"D": -12.078125,
"E": -12.640625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.1953125,
"scores": {
"A": -9.03125,
"B": -11.2265625,
"C": -11.265625,
"D": -11.5234375,
"E": -10.6171875
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 0.265625,
"scores": {
"A": -10.140625,
"B": -8.859375,
"C": -9.125,
"D": -10.7578125,
"E": -10.1015625
}
},
"transfer_donor_ex_id": "strategyqa-test-5",
"transfer_donor_gold": "YES",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 0.046875,
"scores": {
"A": -10.1796875,
"B": -9.7734375,
"C": -9.8203125,
"D": -11.078125,
"E": -10.5625
}
}
},
{
"ex_id": "aqua-test-78",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.078125,
"scores": {
"A": -12.7890625,
"B": -8.3203125,
"C": -11.3984375,
"D": -13.765625,
"E": -13.84375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.63671875,
"scores": {
"A": -5.62890625,
"B": -10.265625,
"C": -11.3125,
"D": -12.078125,
"E": -12.4296875
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 1.9921875,
"scores": {
"A": -10.28125,
"B": -7.109375,
"C": -9.1015625,
"D": -11.7890625,
"E": -11.53125
}
},
"transfer_donor_ex_id": "gsm8k-test-32",
"transfer_donor_gold": "11232",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 1.671875,
"scores": {
"A": -10.6796875,
"B": -7.3671875,
"C": -9.0390625,
"D": -11.8359375,
"E": -11.515625
}
}
},
{
"ex_id": "aqua-test-87",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.625,
"scores": {
"A": -9.7890625,
"B": -9.1640625,
"C": -11.234375,
"D": -12.0,
"E": -11.46875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.90625,
"scores": {
"A": -6.578125,
"B": -9.484375,
"C": -9.703125,
"D": -9.1875,
"E": -11.25
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 1.578125,
"scores": {
"A": -9.5234375,
"B": -7.9453125,
"C": -10.046875,
"D": -10.640625,
"E": -10.7578125
}
},
"transfer_donor_ex_id": "commonsenseqa-validation-13",
"transfer_donor_gold": "A",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 1.4296875,
"scores": {
"A": -9.921875,
"B": -8.4921875,
"C": -10.4140625,
"D": -10.9453125,
"E": -11.1328125
}
}
},
{
"ex_id": "aqua-test-100",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.1484375,
"scores": {
"A": -9.265625,
"B": -9.7265625,
"C": -9.1171875,
"D": -10.0546875,
"E": -10.6015625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.16015625,
"scores": {
"A": -4.76171875,
"B": -9.390625,
"C": -10.921875,
"D": -11.46875,
"E": -13.1875
}
},
"patched_transfer": {
"pred_label": "C",
"correct": true,
"margin": 0.5546875,
"scores": {
"A": -7.515625,
"B": -7.2734375,
"C": -6.71875,
"D": -8.953125,
"E": -9.90625
}
},
"transfer_donor_ex_id": "strategyqa-test-52",
"transfer_donor_gold": "NO",
"patched_self": {
"pred_label": "C",
"correct": true,
"margin": 0.421875,
"scores": {
"A": -8.2421875,
"B": -8.515625,
"C": -7.8203125,
"D": -9.5078125,
"E": -10.5859375
}
}
},
{
"ex_id": "aqua-test-103",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.1484375,
"scores": {
"A": -9.734375,
"B": -8.5234375,
"C": -9.6875,
"D": -11.4375,
"E": -9.671875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -0.75,
"scores": {
"A": -7.84375,
"B": -8.59375,
"C": -10.53125,
"D": -9.78125,
"E": -8.421875
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 1.296875,
"scores": {
"A": -8.8125,
"B": -7.390625,
"C": -8.859375,
"D": -10.28125,
"E": -8.6875
}
},
"transfer_donor_ex_id": "gsm8k-test-22",
"transfer_donor_gold": "18",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 1.27734375,
"scores": {
"A": -8.953125,
"B": -7.44140625,
"C": -8.75,
"D": -10.265625,
"E": -8.71875
}
}
},
{
"ex_id": "aqua-test-105",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 3.03125,
"scores": {
"A": -11.5,
"B": -12.0234375,
"C": -8.46875,
"D": -13.9765625,
"E": -13.28125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.12109375,
"scores": {
"A": -7.45703125,
"B": -9.78125,
"C": -8.578125,
"D": -13.375,
"E": -13.59375
}
},
"patched_transfer": {
"pred_label": "C",
"correct": true,
"margin": 2.23046875,
"scores": {
"A": -10.125,
"B": -9.671875,
"C": -7.44140625,
"D": -12.0625,
"E": -12.125
}
},
"transfer_donor_ex_id": "gsm8k-test-9",
"transfer_donor_gold": "2",
"patched_self": {
"pred_label": "C",
"correct": true,
"margin": 2.203125,
"scores": {
"A": -10.34375,
"B": -10.25,
"C": -8.046875,
"D": -12.796875,
"E": -12.75
}
}
},
{
"ex_id": "aqua-test-111",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.203125,
"scores": {
"A": -9.796875,
"B": -9.2734375,
"C": -9.4765625,
"D": -10.7578125,
"E": -11.4296875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.6328125,
"scores": {
"A": -8.4140625,
"B": -12.046875,
"C": -12.484375,
"D": -13.3125,
"E": -14.53125
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 0.171875,
"scores": {
"A": -8.796875,
"B": -7.1796875,
"C": -7.3515625,
"D": -9.28125,
"E": -8.6640625
}
},
"transfer_donor_ex_id": "strategyqa-test-8",
"transfer_donor_gold": "YES",
"patched_self": {
"pred_label": "C",
"correct": false,
"margin": -0.0234375,
"scores": {
"A": -9.125,
"B": -8.1484375,
"C": -8.125,
"D": -9.4453125,
"E": -9.2578125
}
}
},
{
"ex_id": "aqua-test-116",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.7109375,
"scores": {
"A": -12.0390625,
"B": -9.7421875,
"C": -11.453125,
"D": -11.5390625,
"E": -11.8203125
}
},
"ablated_1": {
"pred_label": "D",
"correct": false,
"margin": -0.51171875,
"scores": {
"A": -7.28125,
"B": -7.4921875,
"C": -8.8203125,
"D": -6.98046875,
"E": -7.3046875
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 1.71484375,
"scores": {
"A": -8.359375,
"B": -6.64453125,
"C": -8.578125,
"D": -8.796875,
"E": -9.3125
}
},
"transfer_donor_ex_id": "commonsenseqa-validation-61",
"transfer_donor_gold": "A",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 1.375,
"scores": {
"A": -8.359375,
"B": -6.984375,
"C": -8.78125,
"D": -8.8671875,
"E": -9.4453125
}
}
},
{
"ex_id": "aqua-test-120",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.3515625,
"scores": {
"A": -12.625,
"B": -10.171875,
"C": -10.5234375,
"D": -11.96875,
"E": -12.625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.84375,
"scores": {
"A": -7.4375,
"B": -12.28125,
"C": -11.75,
"D": -11.984375,
"E": -13.75
}
},
"patched_transfer": {
"pred_label": "C",
"correct": false,
"margin": -0.234375,
"scores": {
"A": -12.015625,
"B": -8.328125,
"C": -8.09375,
"D": -10.21875,
"E": -10.296875
}
},
"transfer_donor_ex_id": "strategyqa-test-7",
"transfer_donor_gold": "NO",
"patched_self": {
"pred_label": "C",
"correct": false,
"margin": -0.5,
"scores": {
"A": -12.2734375,
"B": -9.34375,
"C": -8.84375,
"D": -10.6796875,
"E": -10.75
}
}
},
{
"ex_id": "aqua-test-122",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.671875,
"scores": {
"A": -11.09375,
"B": -10.421875,
"C": -13.25,
"D": -13.296875,
"E": -13.5
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.328125,
"scores": {
"A": -6.90625,
"B": -9.234375,
"C": -12.421875,
"D": -11.0,
"E": -12.1796875
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 0.4375,
"scores": {
"A": -9.5546875,
"B": -9.1171875,
"C": -11.0,
"D": -11.3125,
"E": -12.390625
}
},
"transfer_donor_ex_id": "gsm8k-test-21",
"transfer_donor_gold": "3",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 0.328125,
"scores": {
"A": -10.2265625,
"B": -9.8984375,
"C": -12.0234375,
"D": -12.0859375,
"E": -13.40625
}
}
},
{
"ex_id": "aqua-test-123",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 1.9296875,
"scores": {
"A": -12.8125,
"B": -13.265625,
"C": -10.09375,
"D": -12.0234375,
"E": -12.84375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.0625,
"scores": {
"A": -7.71875,
"B": -10.9296875,
"C": -11.78125,
"D": -11.5546875,
"E": -13.9375
}
},
"patched_transfer": {
"pred_label": "C",
"correct": true,
"margin": 0.1953125,
"scores": {
"A": -11.609375,
"B": -11.1875,
"C": -10.9921875,
"D": -11.9765625,
"E": -12.8125
}
},
"transfer_donor_ex_id": "commonsenseqa-validation-12",
"transfer_donor_gold": "E",
"patched_self": {
"pred_label": "C",
"correct": true,
"margin": 0.1171875,
"scores": {
"A": -11.625,
"B": -11.296875,
"C": -11.1796875,
"D": -12.0078125,
"E": -12.8671875
}
}
},
{
"ex_id": "aqua-test-125",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.25,
"scores": {
"A": -12.8984375,
"B": -12.015625,
"C": -10.3671875,
"D": -10.6171875,
"E": -11.515625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.16796875,
"scores": {
"A": -6.90234375,
"B": -8.6875,
"C": -10.0703125,
"D": -9.84375,
"E": -10.1640625
}
},
"patched_transfer": {
"pred_label": "C",
"correct": true,
"margin": 0.31640625,
"scores": {
"A": -9.65625,
"B": -8.28125,
"C": -7.96484375,
"D": -9.09375,
"E": -9.828125
}
},
"transfer_donor_ex_id": "strategyqa-test-17",
"transfer_donor_gold": "YES",
"patched_self": {
"pred_label": "B",
"correct": false,
"margin": -0.2421875,
"scores": {
"A": -9.578125,
"B": -8.921875,
"C": -9.1640625,
"D": -9.5703125,
"E": -10.484375
}
}
},
{
"ex_id": "aqua-test-130",
"gold": "D",
"baseline": {
"pred_label": "D",
"correct": true,
"margin": 0.5625,
"scores": {
"A": -12.53125,
"B": -11.625,
"C": -14.40625,
"D": -11.0625,
"E": -12.203125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -0.62890625,
"scores": {
"A": -7.70703125,
"B": -7.73046875,
"C": -10.4296875,
"D": -8.3359375,
"E": -8.21875
}
},
"patched_transfer": {
"pred_label": "B",
"correct": false,
"margin": -1.234375,
"scores": {
"A": -10.640625,
"B": -8.5390625,
"C": -9.2578125,
"D": -9.7734375,
"E": -9.6796875
}
},
"transfer_donor_ex_id": "strategyqa-test-2",
"transfer_donor_gold": "NO",
"patched_self": {
"pred_label": "B",
"correct": false,
"margin": -0.875,
"scores": {
"A": -10.1953125,
"B": -8.4921875,
"C": -9.40625,
"D": -9.3671875,
"E": -9.40625
}
}
},
{
"ex_id": "aqua-test-140",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.09375,
"scores": {
"A": -12.171875,
"B": -10.953125,
"C": -12.484375,
"D": -12.046875,
"E": -12.828125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.09375,
"scores": {
"A": -8.640625,
"B": -9.734375,
"C": -10.828125,
"D": -10.234375,
"E": -11.625
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 1.296875,
"scores": {
"A": -9.4921875,
"B": -8.1953125,
"C": -9.5546875,
"D": -10.21875,
"E": -10.90625
}
},
"transfer_donor_ex_id": "commonsenseqa-validation-30",
"transfer_donor_gold": "E",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 1.1328125,
"scores": {
"A": -9.6015625,
"B": -8.46875,
"C": -9.8515625,
"D": -10.34375,
"E": -11.0546875
}
}
},
{
"ex_id": "aqua-test-141",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.390625,
"scores": {
"A": -15.65625,
"B": -14.0,
"C": -12.3359375,
"D": -12.7265625,
"E": -13.421875
}
},
"ablated_1": {
"pred_label": "E",
"correct": false,
"margin": -1.5078125,
"scores": {
"A": -9.03125,
"B": -10.1640625,
"C": -9.59375,
"D": -8.109375,
"E": -8.0859375
}
},
"patched_transfer": {
"pred_label": "C",
"correct": true,
"margin": 1.4453125,
"scores": {
"A": -13.15625,
"B": -10.59375,
"C": -8.6328125,
"D": -10.078125,
"E": -10.09375
}
},
"transfer_donor_ex_id": "strategyqa-test-3",
"transfer_donor_gold": "NO",
"patched_self": {
"pred_label": "C",
"correct": true,
"margin": 0.8984375,
"scores": {
"A": -12.7734375,
"B": -10.578125,
"C": -8.90625,
"D": -9.8046875,
"E": -9.9921875
}
}
},
{
"ex_id": "aqua-test-148",
"gold": "D",
"baseline": {
"pred_label": "D",
"correct": true,
"margin": 0.09375,
"scores": {
"A": -11.84375,
"B": -8.6875,
"C": -9.390625,
"D": -8.59375,
"E": -10.328125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -0.2890625,
"scores": {
"A": -8.21875,
"B": -9.21875,
"C": -9.4140625,
"D": -8.5078125,
"E": -9.5859375
}
},
"patched_transfer": {
"pred_label": "B",
"correct": false,
"margin": -1.1171875,
"scores": {
"A": -9.265625,
"B": -6.796875,
"C": -7.125,
"D": -7.9140625,
"E": -8.96875
}
},
"transfer_donor_ex_id": "gsm8k-test-58",
"transfer_donor_gold": "4",
"patched_self": {
"pred_label": "B",
"correct": false,
"margin": -1.03125,
"scores": {
"A": -9.2578125,
"B": -6.734375,
"C": -7.015625,
"D": -7.765625,
"E": -8.8828125
}
}
},
{
"ex_id": "aqua-test-152",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.65625,
"scores": {
"A": -12.15625,
"B": -11.09375,
"C": -11.75,
"D": -11.765625,
"E": -11.75
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.6015625,
"scores": {
"A": -8.2734375,
"B": -12.875,
"C": -16.15625,
"D": -13.109375,
"E": -13.90625
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 1.046875,
"scores": {
"A": -9.265625,
"B": -7.828125,
"C": -8.875,
"D": -9.4375,
"E": -9.15625
}
},
"transfer_donor_ex_id": "strategyqa-test-46",
"transfer_donor_gold": "YES",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 0.7734375,
"scores": {
"A": -10.3125,
"B": -9.5,
"C": -10.7109375,
"D": -10.359375,
"E": -10.2734375
}
}
},
{
"ex_id": "aqua-test-167",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 2.28125,
"scores": {
"A": -13.3125,
"B": -10.640625,
"C": -12.921875,
"D": -16.09375,
"E": -14.75
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.328125,
"scores": {
"A": -8.015625,
"B": -11.34375,
"C": -15.125,
"D": -13.3125,
"E": -14.375
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 1.7109375,
"scores": {
"A": -11.7578125,
"B": -9.7890625,
"C": -11.5,
"D": -13.203125,
"E": -13.0703125
}
},
"transfer_donor_ex_id": "commonsenseqa-validation-52",
"transfer_donor_gold": "A",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 1.7265625,
"scores": {
"A": -11.875,
"B": -10.1484375,
"C": -11.890625,
"D": -13.453125,
"E": -13.28125
}
}
},
{
"ex_id": "aqua-test-178",
"gold": "E",
"baseline": {
"pred_label": "E",
"correct": true,
"margin": 0.4609375,
"scores": {
"A": -13.1953125,
"B": -11.1015625,
"C": -12.8203125,
"D": -12.625,
"E": -10.640625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.72265625,
"scores": {
"A": -5.77734375,
"B": -10.375,
"C": -8.5,
"D": -10.84375,
"E": -12.5
}
},
"patched_transfer": {
"pred_label": "B",
"correct": false,
"margin": -3.421875,
"scores": {
"A": -10.203125,
"B": -5.3203125,
"C": -6.4765625,
"D": -9.546875,
"E": -8.7421875
}
},
"transfer_donor_ex_id": "gsm8k-test-8",
"transfer_donor_gold": "25",
"patched_self": {
"pred_label": "B",
"correct": false,
"margin": -2.9375,
"scores": {
"A": -9.796875,
"B": -5.3828125,
"C": -6.3046875,
"D": -9.1640625,
"E": -8.3203125
}
}
},
{
"ex_id": "aqua-test-181",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.7734375,
"scores": {
"A": -9.6328125,
"B": -8.859375,
"C": -11.828125,
"D": -11.640625,
"E": -10.6875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.5078125,
"scores": {
"A": -7.5546875,
"B": -9.0625,
"C": -10.4453125,
"D": -9.140625,
"E": -9.0078125
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 1.234375,
"scores": {
"A": -9.703125,
"B": -8.46875,
"C": -9.71875,
"D": -10.609375,
"E": -9.875
}
},
"transfer_donor_ex_id": "commonsenseqa-validation-17",
"transfer_donor_gold": "B",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 1.1796875,
"scores": {
"A": -9.7265625,
"B": -8.3984375,
"C": -9.578125,
"D": -10.4609375,
"E": -9.8359375
}
}
},
{
"ex_id": "aqua-test-183",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.40625,
"scores": {
"A": -12.6484375,
"B": -9.3671875,
"C": -10.7734375,
"D": -13.140625,
"E": -13.125
}
},
"ablated_1": {
"pred_label": "C",
"correct": false,
"margin": -0.71875,
"scores": {
"A": -8.6640625,
"B": -7.5078125,
"C": -6.7890625,
"D": -9.3046875,
"E": -10.7578125
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 0.53125,
"scores": {
"A": -10.5625,
"B": -6.87890625,
"C": -7.41015625,
"D": -10.3125,
"E": -11.34375
}
},
"transfer_donor_ex_id": "gsm8k-test-17",
"transfer_donor_gold": "350",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 0.625,
"scores": {
"A": -10.9375,
"B": -7.3046875,
"C": -7.9296875,
"D": -10.609375,
"E": -11.703125
}
}
},
{
"ex_id": "aqua-test-189",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.2109375,
"scores": {
"A": -14.5390625,
"B": -11.546875,
"C": -11.8046875,
"D": -11.7578125,
"E": -13.34375
}
},
"ablated_1": {
"pred_label": "D",
"correct": false,
"margin": -0.84375,
"scores": {
"A": -9.875,
"B": -10.3828125,
"C": -10.28125,
"D": -9.5390625,
"E": -9.5625
}
},
"patched_transfer": {
"pred_label": "C",
"correct": false,
"margin": -0.5546875,
"scores": {
"A": -11.5625,
"B": -9.5625,
"C": -9.0078125,
"D": -9.9375,
"E": -9.8125
}
},
"transfer_donor_ex_id": "commonsenseqa-validation-24",
"transfer_donor_gold": "B",
"patched_self": {
"pred_label": "C",
"correct": false,
"margin": -0.7265625,
"scores": {
"A": -11.609375,
"B": -9.78125,
"C": -9.0546875,
"D": -9.8984375,
"E": -9.7109375
}
}
},
{
"ex_id": "aqua-test-190",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.34375,
"scores": {
"A": -13.6015625,
"B": -10.7734375,
"C": -10.4296875,
"D": -13.3671875,
"E": -13.9296875
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -6.5,
"scores": {
"A": -6.3515625,
"B": -9.2578125,
"C": -12.8515625,
"D": -10.84375,
"E": -12.953125
}
},
"patched_transfer": {
"pred_label": "C",
"correct": true,
"margin": 0.28125,
"scores": {
"A": -10.484375,
"B": -9.234375,
"C": -8.953125,
"D": -10.9375,
"E": -11.5078125
}
},
"transfer_donor_ex_id": "strategyqa-test-41",
"transfer_donor_gold": "NO",
"patched_self": {
"pred_label": "C",
"correct": true,
"margin": 0.2890625,
"scores": {
"A": -10.6328125,
"B": -9.8515625,
"C": -9.5625,
"D": -10.96875,
"E": -11.5
}
}
},
{
"ex_id": "aqua-test-191",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.953125,
"scores": {
"A": -11.890625,
"B": -10.9375,
"C": -13.640625,
"D": -14.109375,
"E": -13.765625
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.09375,
"scores": {
"A": -6.4375,
"B": -8.53125,
"C": -11.7734375,
"D": -11.28125,
"E": -12.3203125
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 1.13671875,
"scores": {
"A": -7.1484375,
"B": -6.01171875,
"C": -7.5625,
"D": -8.875,
"E": -9.078125
}
},
"transfer_donor_ex_id": "gsm8k-test-11",
"transfer_donor_gold": "10",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 1.0859375,
"scores": {
"A": -8.28125,
"B": -7.1953125,
"C": -8.546875,
"D": -9.734375,
"E": -9.796875
}
}
},
{
"ex_id": "aqua-test-206",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.1796875,
"scores": {
"A": -11.6171875,
"B": -10.96875,
"C": -11.1484375,
"D": -12.84375,
"E": -14.0
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.03125,
"scores": {
"A": -8.0,
"B": -11.03125,
"C": -12.328125,
"D": -11.765625,
"E": -14.171875
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 1.1796875,
"scores": {
"A": -10.859375,
"B": -9.6796875,
"C": -11.0546875,
"D": -12.25,
"E": -13.625
}
},
"transfer_donor_ex_id": "strategyqa-test-42",
"transfer_donor_gold": "YES",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 0.8046875,
"scores": {
"A": -11.140625,
"B": -10.3359375,
"C": -11.6484375,
"D": -12.53125,
"E": -14.140625
}
}
},
{
"ex_id": "aqua-test-212",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 1.5078125,
"scores": {
"A": -11.59375,
"B": -9.609375,
"C": -11.1171875,
"D": -11.7421875,
"E": -12.8359375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -3.390625,
"scores": {
"A": -4.984375,
"B": -8.375,
"C": -11.28125,
"D": -9.4140625,
"E": -11.890625
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 1.83984375,
"scores": {
"A": -9.765625,
"B": -6.72265625,
"C": -8.5625,
"D": -9.34375,
"E": -9.75
}
},
"transfer_donor_ex_id": "gsm8k-test-41",
"transfer_donor_gold": "131250",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 1.48828125,
"scores": {
"A": -10.0,
"B": -7.37890625,
"C": -8.8671875,
"D": -9.53125,
"E": -9.8125
}
}
},
{
"ex_id": "aqua-test-223",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 3.09375,
"scores": {
"A": -10.265625,
"B": -7.1484375,
"C": -10.2421875,
"D": -10.921875,
"E": -11.3359375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -4.09765625,
"scores": {
"A": -4.67578125,
"B": -8.7734375,
"C": -12.375,
"D": -11.1796875,
"E": -13.109375
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 2.51171875,
"scores": {
"A": -10.9375,
"B": -5.73828125,
"C": -8.25,
"D": -10.328125,
"E": -10.53125
}
},
"transfer_donor_ex_id": "strategyqa-test-62",
"transfer_donor_gold": "YES",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 2.51171875,
"scores": {
"A": -10.4375,
"B": -5.91015625,
"C": -8.421875,
"D": -9.984375,
"E": -10.328125
}
}
},
{
"ex_id": "aqua-test-228",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.078125,
"scores": {
"A": -13.3125,
"B": -10.71875,
"C": -10.796875,
"D": -14.0859375,
"E": -14.8984375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -1.28125,
"scores": {
"A": -7.171875,
"B": -8.453125,
"C": -9.515625,
"D": -10.125,
"E": -8.953125
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 0.98828125,
"scores": {
"A": -8.703125,
"B": -6.40234375,
"C": -7.390625,
"D": -9.640625,
"E": -9.0546875
}
},
"transfer_donor_ex_id": "gsm8k-test-18",
"transfer_donor_gold": "260",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 0.796875,
"scores": {
"A": -8.9296875,
"B": -6.5703125,
"C": -7.3671875,
"D": -9.640625,
"E": -9.15625
}
}
},
{
"ex_id": "aqua-test-249",
"gold": "C",
"baseline": {
"pred_label": "C",
"correct": true,
"margin": 0.828125,
"scores": {
"A": -10.328125,
"B": -8.859375,
"C": -8.03125,
"D": -11.640625,
"E": -11.109375
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.59375,
"scores": {
"A": -5.9921875,
"B": -7.609375,
"C": -8.5859375,
"D": -8.4375,
"E": -8.6796875
}
},
"patched_transfer": {
"pred_label": "B",
"correct": false,
"margin": -0.4375,
"scores": {
"A": -8.703125,
"B": -6.5078125,
"C": -6.9453125,
"D": -9.828125,
"E": -9.34375
}
},
"transfer_donor_ex_id": "gsm8k-test-5",
"transfer_donor_gold": "168",
"patched_self": {
"pred_label": "B",
"correct": false,
"margin": -0.34375,
"scores": {
"A": -8.9921875,
"B": -6.984375,
"C": -7.328125,
"D": -10.1171875,
"E": -9.765625
}
}
},
{
"ex_id": "aqua-test-251",
"gold": "B",
"baseline": {
"pred_label": "B",
"correct": true,
"margin": 0.078125,
"scores": {
"A": -9.90625,
"B": -9.828125,
"C": -12.40625,
"D": -11.265625,
"E": -10.703125
}
},
"ablated_1": {
"pred_label": "A",
"correct": false,
"margin": -2.984375,
"scores": {
"A": -9.296875,
"B": -12.28125,
"C": -13.5,
"D": -12.09375,
"E": -11.140625
}
},
"patched_transfer": {
"pred_label": "B",
"correct": true,
"margin": 0.4296875,
"scores": {
"A": -8.9765625,
"B": -8.546875,
"C": -10.03125,
"D": -10.625,
"E": -9.125
}
},
"transfer_donor_ex_id": "gsm8k-test-45",
"transfer_donor_gold": "11",
"patched_self": {
"pred_label": "B",
"correct": true,
"margin": 0.53125,
"scores": {
"A": -9.375,
"B": -8.8203125,
"C": -10.15625,
"D": -10.6484375,
"E": -9.3515625
}
}
}
]
}