| { |
| "meta": { |
| "model": "meta-llama/Llama-2-7b-chat-hf", |
| "device": "cuda", |
| "dtype": "fp16", |
| "layer": 10, |
| "layers_path": "model.layers", |
| "task": "aqua", |
| "eval_meta": { |
| "subspace_split": null, |
| "eval_split": "test", |
| "available_splits": [ |
| "train", |
| "test", |
| "validation" |
| ], |
| "hf_id": "aqua_rat", |
| "options_prefix_stripped": true, |
| "force_answer_prefix": true |
| }, |
| "seed": 123, |
| "candidate_labels": [ |
| "A", |
| "B", |
| "C", |
| "D", |
| "E" |
| ], |
| "candidate_text_style": "space_letter", |
| "candidate_token_lens": { |
| "A": 2, |
| "B": 2, |
| "C": 2, |
| "D": 2, |
| "E": 2 |
| }, |
| "max_candidate_token_len": 2, |
| "Qs_path": "Q_shared_layer10.npy", |
| "Qs_shape": [ |
| 4096, |
| 97 |
| ], |
| "flipset_definition": { |
| "alpha": 1.0, |
| "criterion": "baseline correct AND ablated(alpha=1) wrong", |
| "n_eval_loaded": 254, |
| "flipset_total": 42, |
| "flipset_used": 42 |
| }, |
| "alpha_sweep": { |
| "enabled": true, |
| "alpha_list": [ |
| 0.0, |
| 0.05, |
| 0.1, |
| 0.2, |
| 0.5, |
| 1.0 |
| ] |
| }, |
| "transfer_patching": { |
| "enabled": true, |
| "patch_window_requested": "steps_0", |
| "patch_steps_requested": [ |
| 0 |
| ], |
| "patch_steps_final": [ |
| 0 |
| ], |
| "run_self_patch_ref": true |
| } |
| }, |
| "scan_rows": [ |
| { |
| "ex_id": "aqua-test-0", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.5390625, |
| "scores": { |
| "A": -9.953125, |
| "B": -9.9296875, |
| "C": -9.390625, |
| "D": -11.7421875, |
| "E": -11.375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.2421875, |
| "scores": { |
| "A": -9.1796875, |
| "B": -10.421875, |
| "C": -11.0546875, |
| "D": -11.21875, |
| "E": -11.1171875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-1", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.0234375, |
| "scores": { |
| "A": -12.90625, |
| "B": -11.53125, |
| "C": -11.5546875, |
| "D": -13.53125, |
| "E": -13.296875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.46875, |
| "scores": { |
| "A": -6.5625, |
| "B": -11.109375, |
| "C": -12.03125, |
| "D": -11.03125, |
| "E": -13.0 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-2", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2578125, |
| "scores": { |
| "A": -11.234375, |
| "B": -10.2109375, |
| "C": -13.171875, |
| "D": -12.4453125, |
| "E": -10.46875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.953125, |
| "scores": { |
| "A": -6.0625, |
| "B": -14.015625, |
| "C": -17.125, |
| "D": -15.2734375, |
| "E": -15.640625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-3", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.1171875, |
| "scores": { |
| "A": -12.6796875, |
| "B": -8.5078125, |
| "C": -9.625, |
| "D": -12.859375, |
| "E": -15.15625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.0546875, |
| "scores": { |
| "A": -9.75, |
| "B": -9.1875, |
| "C": -10.2421875, |
| "D": -11.046875, |
| "E": -10.8984375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-4", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.8984375, |
| "scores": { |
| "A": -10.921875, |
| "B": -11.8203125, |
| "C": -13.1171875, |
| "D": -12.671875, |
| "E": -12.2578125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.8125, |
| "scores": { |
| "A": -8.171875, |
| "B": -10.75, |
| "C": -11.90625, |
| "D": -9.984375, |
| "E": -10.890625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-5", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -11.9921875, |
| "B": -10.9765625, |
| "C": -12.0390625, |
| "D": -11.9609375, |
| "E": -11.9296875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.1640625, |
| "scores": { |
| "A": -7.59375, |
| "B": -9.7578125, |
| "C": -11.0234375, |
| "D": -9.1953125, |
| "E": -10.0625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-6", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.40625, |
| "scores": { |
| "A": -10.84375, |
| "B": -12.203125, |
| "C": -13.25, |
| "D": -14.3125, |
| "E": -12.21875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.515625, |
| "scores": { |
| "A": -7.015625, |
| "B": -9.8671875, |
| "C": -12.53125, |
| "D": -11.03125, |
| "E": -11.0078125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-7", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.171875, |
| "scores": { |
| "A": -10.5625, |
| "B": -9.359375, |
| "C": -8.96875, |
| "D": -11.140625, |
| "E": -10.96875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.88671875, |
| "scores": { |
| "A": -6.71484375, |
| "B": -10.6875, |
| "C": -10.5546875, |
| "D": -10.6015625, |
| "E": -12.171875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-8", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.28125, |
| "scores": { |
| "A": -13.03125, |
| "B": -11.890625, |
| "C": -14.171875, |
| "D": -12.390625, |
| "E": -14.484375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -8.05859375, |
| "scores": { |
| "A": -6.81640625, |
| "B": -11.90625, |
| "C": -14.875, |
| "D": -11.125, |
| "E": -12.953125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-9", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.078125, |
| "scores": { |
| "A": -11.265625, |
| "B": -8.890625, |
| "C": -9.96875, |
| "D": -12.359375, |
| "E": -13.9921875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.01953125, |
| "scores": { |
| "A": -7.32421875, |
| "B": -11.34375, |
| "C": -11.5, |
| "D": -13.6875, |
| "E": -15.28125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-10", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.21875, |
| "scores": { |
| "A": -11.90625, |
| "B": -10.2421875, |
| "C": -13.296875, |
| "D": -11.453125, |
| "E": -10.4609375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.09375, |
| "scores": { |
| "A": -7.1796875, |
| "B": -11.0390625, |
| "C": -14.765625, |
| "D": -12.578125, |
| "E": -14.2734375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-11", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.0390625, |
| "scores": { |
| "A": -11.6875, |
| "B": -10.359375, |
| "C": -12.546875, |
| "D": -12.5859375, |
| "E": -12.3984375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.6796875, |
| "scores": { |
| "A": -6.9609375, |
| "B": -10.3046875, |
| "C": -13.40625, |
| "D": -11.125, |
| "E": -13.640625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-12", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.484375, |
| "scores": { |
| "A": -12.78125, |
| "B": -9.015625, |
| "C": -11.5, |
| "D": -11.5625, |
| "E": -13.0 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.1875, |
| "scores": { |
| "A": -8.7890625, |
| "B": -9.625, |
| "C": -11.9765625, |
| "D": -8.7890625, |
| "E": -11.3984375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-13", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.7890625, |
| "scores": { |
| "A": -12.8515625, |
| "B": -9.4609375, |
| "C": -9.546875, |
| "D": -12.25, |
| "E": -12.8671875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.83984375, |
| "scores": { |
| "A": -6.23828125, |
| "B": -9.609375, |
| "C": -8.1015625, |
| "D": -10.078125, |
| "E": -13.046875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-14", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.625, |
| "scores": { |
| "A": -12.59375, |
| "B": -9.078125, |
| "C": -10.5390625, |
| "D": -10.703125, |
| "E": -9.5078125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.44921875, |
| "scores": { |
| "A": -7.1328125, |
| "B": -8.40625, |
| "C": -9.1015625, |
| "D": -6.68359375, |
| "E": -8.6953125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-15", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.671875, |
| "scores": { |
| "A": -11.078125, |
| "B": -10.40625, |
| "C": -13.625, |
| "D": -15.3125, |
| "E": -13.8125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.7421875, |
| "scores": { |
| "A": -6.1171875, |
| "B": -10.859375, |
| "C": -11.296875, |
| "D": -11.0625, |
| "E": -13.578125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-16", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.80078125, |
| "scores": { |
| "A": -12.484375, |
| "B": -10.515625, |
| "C": -7.71484375, |
| "D": -12.859375, |
| "E": -12.8125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.87109375, |
| "scores": { |
| "A": -7.82421875, |
| "B": -9.453125, |
| "C": -8.6953125, |
| "D": -9.59375, |
| "E": -11.6953125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-17", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.0625, |
| "scores": { |
| "A": -11.5625, |
| "B": -10.5, |
| "C": -12.515625, |
| "D": -12.8125, |
| "E": -12.875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 4.5, |
| "scores": { |
| "A": -7.1796875, |
| "B": -12.2578125, |
| "C": -14.125, |
| "D": -11.6796875, |
| "E": -15.5078125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-18", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.3984375, |
| "scores": { |
| "A": -11.515625, |
| "B": -10.9765625, |
| "C": -13.96875, |
| "D": -12.375, |
| "E": -12.171875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.6171875, |
| "scores": { |
| "A": -7.203125, |
| "B": -10.859375, |
| "C": -14.8125, |
| "D": -11.8203125, |
| "E": -15.4921875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-19", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.78125, |
| "scores": { |
| "A": -9.515625, |
| "B": -9.0625, |
| "C": -8.734375, |
| "D": -10.546875, |
| "E": -11.125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.40234375, |
| "scores": { |
| "A": -5.53515625, |
| "B": -9.8203125, |
| "C": -8.9375, |
| "D": -10.78125, |
| "E": -11.8828125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-20", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.84375, |
| "scores": { |
| "A": -10.1875, |
| "B": -9.6875, |
| "C": -9.8125, |
| "D": -11.3125, |
| "E": -11.53125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.8046875, |
| "scores": { |
| "A": -6.5234375, |
| "B": -9.34375, |
| "C": -12.984375, |
| "D": -10.1328125, |
| "E": -14.328125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-21", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.765625, |
| "scores": { |
| "A": -10.4140625, |
| "B": -9.6484375, |
| "C": -12.5546875, |
| "D": -12.234375, |
| "E": -11.3828125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.5390625, |
| "scores": { |
| "A": -6.1328125, |
| "B": -9.671875, |
| "C": -10.8046875, |
| "D": -10.671875, |
| "E": -11.2109375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-22", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.96875, |
| "scores": { |
| "A": -9.7734375, |
| "B": -8.546875, |
| "C": -11.7734375, |
| "D": -12.625, |
| "E": -11.515625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.88671875, |
| "scores": { |
| "A": -6.34765625, |
| "B": -9.96875, |
| "C": -12.5625, |
| "D": -13.21875, |
| "E": -14.234375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-23", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.125, |
| "scores": { |
| "A": -9.890625, |
| "B": -8.765625, |
| "C": -10.03125, |
| "D": -11.890625, |
| "E": -10.671875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.6796875, |
| "scores": { |
| "A": -6.0859375, |
| "B": -10.3125, |
| "C": -12.046875, |
| "D": -12.765625, |
| "E": -13.46875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-24", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.5390625, |
| "scores": { |
| "A": -11.6328125, |
| "B": -10.71875, |
| "C": -13.28125, |
| "D": -11.4296875, |
| "E": -11.2578125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.109375, |
| "scores": { |
| "A": -7.28125, |
| "B": -7.6171875, |
| "C": -9.4375, |
| "D": -7.8515625, |
| "E": -7.390625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-25", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.0546875, |
| "scores": { |
| "A": -12.953125, |
| "B": -12.2578125, |
| "C": -12.203125, |
| "D": -12.4140625, |
| "E": -13.6328125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.5625, |
| "scores": { |
| "A": -8.6484375, |
| "B": -10.8828125, |
| "C": -10.2109375, |
| "D": -9.4609375, |
| "E": -10.859375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-26", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -3.8125, |
| "scores": { |
| "A": -12.578125, |
| "B": -10.75, |
| "C": -8.765625, |
| "D": -12.953125, |
| "E": -10.875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.6015625, |
| "scores": { |
| "A": -7.703125, |
| "B": -10.7890625, |
| "C": -8.3046875, |
| "D": -9.640625, |
| "E": -11.390625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-27", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.8125, |
| "scores": { |
| "A": -10.90625, |
| "B": -9.609375, |
| "C": -10.671875, |
| "D": -12.421875, |
| "E": -12.0 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.328125, |
| "scores": { |
| "A": -8.1640625, |
| "B": -10.15625, |
| "C": -11.375, |
| "D": -10.4921875, |
| "E": -11.28125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-28", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.3359375, |
| "scores": { |
| "A": -10.4296875, |
| "B": -11.765625, |
| "C": -12.7265625, |
| "D": -13.0390625, |
| "E": -13.0546875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 4.234375, |
| "scores": { |
| "A": -9.5234375, |
| "B": -13.7578125, |
| "C": -14.6015625, |
| "D": -14.578125, |
| "E": -13.765625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-29", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.6875, |
| "scores": { |
| "A": -12.921875, |
| "B": -10.234375, |
| "C": -10.7578125, |
| "D": -10.984375, |
| "E": -12.3828125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.3828125, |
| "scores": { |
| "A": -8.4296875, |
| "B": -10.6015625, |
| "C": -11.890625, |
| "D": -9.8125, |
| "E": -11.0 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-30", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -4.3203125, |
| "scores": { |
| "A": -11.671875, |
| "B": -11.4140625, |
| "C": -11.09375, |
| "D": -15.4140625, |
| "E": -11.8359375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.8203125, |
| "scores": { |
| "A": -6.5546875, |
| "B": -12.5703125, |
| "C": -14.359375, |
| "D": -13.375, |
| "E": -12.265625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-31", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.5078125, |
| "scores": { |
| "A": -10.9765625, |
| "B": -9.859375, |
| "C": -10.3671875, |
| "D": -10.8515625, |
| "E": -11.3671875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.296875, |
| "scores": { |
| "A": -6.6328125, |
| "B": -10.09375, |
| "C": -11.9296875, |
| "D": -10.46875, |
| "E": -9.6484375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-32", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.453125, |
| "scores": { |
| "A": -12.71875, |
| "B": -10.984375, |
| "C": -10.53125, |
| "D": -12.484375, |
| "E": -11.609375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.62109375, |
| "scores": { |
| "A": -6.75390625, |
| "B": -10.375, |
| "C": -10.203125, |
| "D": -12.796875, |
| "E": -13.765625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-33", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.1875, |
| "scores": { |
| "A": -17.28125, |
| "B": -18.1875, |
| "C": -16.09375, |
| "D": -19.15625, |
| "E": -19.46875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.4375, |
| "scores": { |
| "A": -9.125, |
| "B": -10.1171875, |
| "C": -9.5625, |
| "D": -10.0703125, |
| "E": -10.4921875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-34", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.9375, |
| "scores": { |
| "A": -14.8359375, |
| "B": -11.34375, |
| "C": -14.7109375, |
| "D": -14.28125, |
| "E": -15.34375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.2890625, |
| "scores": { |
| "A": -9.546875, |
| "B": -11.46875, |
| "C": -14.8046875, |
| "D": -14.8359375, |
| "E": -18.359375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-35", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.046875, |
| "scores": { |
| "A": -11.9375, |
| "B": -10.578125, |
| "C": -11.625, |
| "D": -11.265625, |
| "E": -11.609375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.0078125, |
| "scores": { |
| "A": -8.0625, |
| "B": -10.21875, |
| "C": -10.0703125, |
| "D": -9.2890625, |
| "E": -9.390625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-36", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.734375, |
| "scores": { |
| "A": -9.8984375, |
| "B": -10.4921875, |
| "C": -10.7421875, |
| "D": -12.6953125, |
| "E": -11.6328125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.5546875, |
| "scores": { |
| "A": -8.3046875, |
| "B": -12.734375, |
| "C": -12.671875, |
| "D": -14.5625, |
| "E": -14.859375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-37", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.0, |
| "scores": { |
| "A": -11.25, |
| "B": -10.5078125, |
| "C": -13.328125, |
| "D": -12.7734375, |
| "E": -11.5078125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.84375, |
| "scores": { |
| "A": -9.515625, |
| "B": -10.703125, |
| "C": -12.234375, |
| "D": -10.921875, |
| "E": -10.359375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-38", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.59375, |
| "scores": { |
| "A": -13.921875, |
| "B": -12.09375, |
| "C": -17.015625, |
| "D": -16.796875, |
| "E": -13.6875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.6328125, |
| "scores": { |
| "A": -8.9765625, |
| "B": -11.46875, |
| "C": -14.421875, |
| "D": -12.515625, |
| "E": -12.609375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-39", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.71875, |
| "scores": { |
| "A": -10.2265625, |
| "B": -11.9453125, |
| "C": -12.1484375, |
| "D": -14.3125, |
| "E": -14.015625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.390625, |
| "scores": { |
| "A": -10.234375, |
| "B": -10.1875, |
| "C": -9.84375, |
| "D": -11.59375, |
| "E": -10.8515625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-40", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.1875, |
| "scores": { |
| "A": -9.90625, |
| "B": -8.71875, |
| "C": -8.9375, |
| "D": -11.40625, |
| "E": -10.3125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 2.8828125, |
| "scores": { |
| "A": -6.9609375, |
| "B": -10.8671875, |
| "C": -10.4765625, |
| "D": -9.84375, |
| "E": -9.953125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-41", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.484375, |
| "scores": { |
| "A": -13.234375, |
| "B": -12.765625, |
| "C": -14.671875, |
| "D": -16.25, |
| "E": -16.5 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.0859375, |
| "scores": { |
| "A": -8.6328125, |
| "B": -9.375, |
| "C": -10.65625, |
| "D": -10.71875, |
| "E": -10.390625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-42", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.96875, |
| "scores": { |
| "A": -14.828125, |
| "B": -12.5, |
| "C": -12.890625, |
| "D": -13.46875, |
| "E": -14.859375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.40234375, |
| "scores": { |
| "A": -6.67578125, |
| "B": -11.125, |
| "C": -12.828125, |
| "D": -10.078125, |
| "E": -12.3203125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-43", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.734375, |
| "scores": { |
| "A": -12.140625, |
| "B": -11.703125, |
| "C": -11.390625, |
| "D": -12.125, |
| "E": -13.984375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.8125, |
| "scores": { |
| "A": -5.0, |
| "B": -7.875, |
| "C": -9.0546875, |
| "D": -6.8125, |
| "E": -9.6015625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-44", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.390625, |
| "scores": { |
| "A": -10.1171875, |
| "B": -10.5078125, |
| "C": -10.5078125, |
| "D": -12.1953125, |
| "E": -15.0 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.46875, |
| "scores": { |
| "A": -7.4375, |
| "B": -11.4375, |
| "C": -14.21875, |
| "D": -10.90625, |
| "E": -14.3984375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-45", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -1.0546875, |
| "scores": { |
| "A": -12.984375, |
| "B": -12.3359375, |
| "C": -11.9296875, |
| "D": -12.359375, |
| "E": -12.4921875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 2.30078125, |
| "scores": { |
| "A": -7.07421875, |
| "B": -9.375, |
| "C": -10.578125, |
| "D": -9.984375, |
| "E": -12.9375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-46", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.796875, |
| "scores": { |
| "A": -12.9609375, |
| "B": -11.390625, |
| "C": -11.625, |
| "D": -11.90625, |
| "E": -12.1875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.734375, |
| "scores": { |
| "A": -7.03125, |
| "B": -8.359375, |
| "C": -10.5, |
| "D": -8.4453125, |
| "E": -8.765625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-47", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.203125, |
| "scores": { |
| "A": -11.9453125, |
| "B": -12.5, |
| "C": -12.1171875, |
| "D": -13.046875, |
| "E": -11.7421875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.0390625, |
| "scores": { |
| "A": -9.3828125, |
| "B": -11.8515625, |
| "C": -13.359375, |
| "D": -12.15625, |
| "E": -13.421875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-48", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.296875, |
| "scores": { |
| "A": -14.109375, |
| "B": -14.640625, |
| "C": -9.0625, |
| "D": -10.3125, |
| "E": -11.359375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -1.5078125, |
| "scores": { |
| "A": -7.671875, |
| "B": -8.9296875, |
| "C": -6.4140625, |
| "D": -7.8203125, |
| "E": -7.921875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-49", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -4.125, |
| "scores": { |
| "A": -15.953125, |
| "B": -11.828125, |
| "C": -14.90625, |
| "D": -13.234375, |
| "E": -13.984375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.09375, |
| "scores": { |
| "A": -8.953125, |
| "B": -10.046875, |
| "C": -13.53125, |
| "D": -10.265625, |
| "E": -12.515625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-50", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.2890625, |
| "scores": { |
| "A": -9.515625, |
| "B": -8.5703125, |
| "C": -9.984375, |
| "D": -10.859375, |
| "E": -10.7265625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.7578125, |
| "scores": { |
| "A": -7.0546875, |
| "B": -10.078125, |
| "C": -12.5625, |
| "D": -10.8125, |
| "E": -13.34375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-51", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.25, |
| "scores": { |
| "A": -11.4375, |
| "B": -9.515625, |
| "C": -9.6171875, |
| "D": -12.765625, |
| "E": -11.96875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.03125, |
| "scores": { |
| "A": -5.234375, |
| "B": -8.6015625, |
| "C": -12.21875, |
| "D": -10.265625, |
| "E": -11.484375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-52", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.0625, |
| "scores": { |
| "A": -12.890625, |
| "B": -9.8515625, |
| "C": -9.9140625, |
| "D": -11.515625, |
| "E": -10.6875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.4765625, |
| "scores": { |
| "A": -5.1328125, |
| "B": -5.609375, |
| "C": -6.609375, |
| "D": -6.8984375, |
| "E": -6.4296875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-53", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -4.0234375, |
| "scores": { |
| "A": -12.7109375, |
| "B": -12.4140625, |
| "C": -8.390625, |
| "D": -12.4140625, |
| "E": -13.4609375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.78125, |
| "scores": { |
| "A": -4.9375, |
| "B": -8.796875, |
| "C": -8.203125, |
| "D": -9.71875, |
| "E": -10.1171875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-54", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.21875, |
| "scores": { |
| "A": -12.4140625, |
| "B": -12.7578125, |
| "C": -11.3046875, |
| "D": -13.5234375, |
| "E": -11.6484375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.5625, |
| "scores": { |
| "A": -7.515625, |
| "B": -12.953125, |
| "C": -11.71875, |
| "D": -13.078125, |
| "E": -12.3203125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-55", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.859375, |
| "scores": { |
| "A": -10.5, |
| "B": -10.5703125, |
| "C": -9.7109375, |
| "D": -13.1640625, |
| "E": -11.4921875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.265625, |
| "scores": { |
| "A": -8.015625, |
| "B": -10.28125, |
| "C": -11.234375, |
| "D": -11.296875, |
| "E": -13.421875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-56", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -1.5703125, |
| "scores": { |
| "A": -10.5859375, |
| "B": -11.296875, |
| "C": -12.3671875, |
| "D": -12.1328125, |
| "E": -10.5625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.2734375, |
| "scores": { |
| "A": -7.0625, |
| "B": -8.96875, |
| "C": -10.53125, |
| "D": -8.3359375, |
| "E": -9.71875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-57", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.15625, |
| "scores": { |
| "A": -13.875, |
| "B": -12.96875, |
| "C": -14.359375, |
| "D": -14.140625, |
| "E": -13.125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.640625, |
| "scores": { |
| "A": -10.3125, |
| "B": -12.953125, |
| "C": -12.8203125, |
| "D": -12.8359375, |
| "E": -12.5078125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-58", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.4296875, |
| "scores": { |
| "A": -15.53125, |
| "B": -10.21875, |
| "C": -13.6484375, |
| "D": -15.59375, |
| "E": -14.75 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -6.56640625, |
| "scores": { |
| "A": -10.90625, |
| "B": -7.32421875, |
| "C": -13.890625, |
| "D": -11.015625, |
| "E": -13.09375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-59", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.03125, |
| "scores": { |
| "A": -9.5546875, |
| "B": -8.5234375, |
| "C": -9.390625, |
| "D": -11.921875, |
| "E": -12.15625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 2.609375, |
| "scores": { |
| "A": -7.515625, |
| "B": -10.125, |
| "C": -10.609375, |
| "D": -12.1328125, |
| "E": -13.125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-60", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -4.2734375, |
| "scores": { |
| "A": -9.9921875, |
| "B": -9.9765625, |
| "C": -10.4921875, |
| "D": -14.25, |
| "E": -14.8125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.6796875, |
| "scores": { |
| "A": -5.640625, |
| "B": -9.828125, |
| "C": -11.71875, |
| "D": -12.3203125, |
| "E": -13.484375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-61", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.4921875, |
| "scores": { |
| "A": -11.6875, |
| "B": -13.140625, |
| "C": -15.5078125, |
| "D": -12.734375, |
| "E": -13.1796875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.265625, |
| "scores": { |
| "A": -7.3671875, |
| "B": -8.8828125, |
| "C": -12.46875, |
| "D": -8.6640625, |
| "E": -9.6328125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-62", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -4.09375, |
| "scores": { |
| "A": -12.8203125, |
| "B": -9.875, |
| "C": -8.7265625, |
| "D": -13.2578125, |
| "E": -14.109375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 2.9140625, |
| "scores": { |
| "A": -5.859375, |
| "B": -8.7734375, |
| "C": -8.9140625, |
| "D": -11.6953125, |
| "E": -13.875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-63", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.8203125, |
| "scores": { |
| "A": -12.9921875, |
| "B": -11.171875, |
| "C": -12.375, |
| "D": -15.0625, |
| "E": -14.8125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 6.546875, |
| "scores": { |
| "A": -9.703125, |
| "B": -16.796875, |
| "C": -17.53125, |
| "D": -16.25, |
| "E": -17.90625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-64", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.109375, |
| "scores": { |
| "A": -11.2578125, |
| "B": -9.859375, |
| "C": -9.96875, |
| "D": -11.4765625, |
| "E": -13.296875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.38671875, |
| "scores": { |
| "A": -7.83203125, |
| "B": -8.5, |
| "C": -10.21875, |
| "D": -9.90625, |
| "E": -10.09375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-65", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.8125, |
| "scores": { |
| "A": -11.46875, |
| "B": -10.65625, |
| "C": -11.546875, |
| "D": -11.28125, |
| "E": -12.859375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 2.3515625, |
| "scores": { |
| "A": -8.5, |
| "B": -10.8515625, |
| "C": -13.46875, |
| "D": -12.703125, |
| "E": -12.703125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-66", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.1328125, |
| "scores": { |
| "A": -12.5234375, |
| "B": -10.875, |
| "C": -12.2421875, |
| "D": -11.890625, |
| "E": -13.0078125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.2734375, |
| "scores": { |
| "A": -8.6328125, |
| "B": -10.921875, |
| "C": -14.421875, |
| "D": -10.703125, |
| "E": -11.90625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-67", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.359375, |
| "scores": { |
| "A": -8.828125, |
| "B": -8.546875, |
| "C": -9.703125, |
| "D": -9.90625, |
| "E": -10.28125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.71875, |
| "scores": { |
| "A": -6.6328125, |
| "B": -11.28125, |
| "C": -13.203125, |
| "D": -11.3515625, |
| "E": -13.6015625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-68", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6875, |
| "scores": { |
| "A": -11.65625, |
| "B": -10.96875, |
| "C": -11.875, |
| "D": -12.078125, |
| "E": -12.640625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.1953125, |
| "scores": { |
| "A": -9.03125, |
| "B": -11.2265625, |
| "C": -11.265625, |
| "D": -11.5234375, |
| "E": -10.6171875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-69", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.5, |
| "scores": { |
| "A": -12.7578125, |
| "B": -10.59375, |
| "C": -11.09375, |
| "D": -14.09375, |
| "E": -13.5859375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.9453125, |
| "scores": { |
| "A": -6.21875, |
| "B": -9.140625, |
| "C": -13.1640625, |
| "D": -11.9453125, |
| "E": -11.6796875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-70", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.75, |
| "scores": { |
| "A": -12.375, |
| "B": -8.625, |
| "C": -13.84375, |
| "D": -10.7578125, |
| "E": -13.421875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.03125, |
| "scores": { |
| "A": -7.08203125, |
| "B": -7.05078125, |
| "C": -11.171875, |
| "D": -7.4140625, |
| "E": -10.75 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-71", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -3.890625, |
| "scores": { |
| "A": -14.125, |
| "B": -15.40625, |
| "C": -10.234375, |
| "D": -13.78125, |
| "E": -11.53125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -0.4140625, |
| "scores": { |
| "A": -8.5703125, |
| "B": -12.78125, |
| "C": -9.265625, |
| "D": -9.6484375, |
| "E": -8.15625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-72", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.359375, |
| "scores": { |
| "A": -14.15625, |
| "B": -12.328125, |
| "C": -13.015625, |
| "D": -13.453125, |
| "E": -13.6875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.46875, |
| "scores": { |
| "A": -8.6796875, |
| "B": -9.1640625, |
| "C": -10.9140625, |
| "D": -9.5625, |
| "E": -10.1484375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-73", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -6.7265625, |
| "scores": { |
| "A": -15.75, |
| "B": -13.71875, |
| "C": -10.2265625, |
| "D": -14.84375, |
| "E": -16.953125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -4.62109375, |
| "scores": { |
| "A": -7.96875, |
| "B": -8.640625, |
| "C": -5.91015625, |
| "D": -10.078125, |
| "E": -10.53125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-74", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -1.40625, |
| "scores": { |
| "A": -12.734375, |
| "B": -11.671875, |
| "C": -11.328125, |
| "D": -13.28125, |
| "E": -14.6875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.25, |
| "scores": { |
| "A": -7.734375, |
| "B": -9.125, |
| "C": -7.984375, |
| "D": -8.2421875, |
| "E": -8.78125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-75", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.4375, |
| "scores": { |
| "A": -12.078125, |
| "B": -8.640625, |
| "C": -10.578125, |
| "D": -12.375, |
| "E": -13.671875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 4.6328125, |
| "scores": { |
| "A": -6.7578125, |
| "B": -11.390625, |
| "C": -13.0234375, |
| "D": -11.796875, |
| "E": -13.3125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-76", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.5703125, |
| "scores": { |
| "A": -13.40625, |
| "B": -11.0234375, |
| "C": -11.59375, |
| "D": -15.15625, |
| "E": -14.484375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.0078125, |
| "scores": { |
| "A": -10.6640625, |
| "B": -12.421875, |
| "C": -12.671875, |
| "D": -12.859375, |
| "E": -14.03125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-77", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.890625, |
| "scores": { |
| "A": -12.28125, |
| "B": -10.5, |
| "C": -9.2265625, |
| "D": -12.1171875, |
| "E": -14.09375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.109375, |
| "scores": { |
| "A": -6.25, |
| "B": -8.390625, |
| "C": -11.625, |
| "D": -11.359375, |
| "E": -12.09375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-78", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.078125, |
| "scores": { |
| "A": -12.7890625, |
| "B": -8.3203125, |
| "C": -11.3984375, |
| "D": -13.765625, |
| "E": -13.84375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.63671875, |
| "scores": { |
| "A": -5.62890625, |
| "B": -10.265625, |
| "C": -11.3125, |
| "D": -12.078125, |
| "E": -12.4296875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-79", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.65625, |
| "scores": { |
| "A": -11.9375, |
| "B": -11.28125, |
| "C": -12.421875, |
| "D": -11.671875, |
| "E": -12.890625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.09375, |
| "scores": { |
| "A": -7.03125, |
| "B": -8.3359375, |
| "C": -10.1875, |
| "D": -8.125, |
| "E": -9.96875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-80", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.5, |
| "scores": { |
| "A": -10.703125, |
| "B": -9.203125, |
| "C": -10.328125, |
| "D": -10.4375, |
| "E": -11.28125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.1484375, |
| "scores": { |
| "A": -6.0, |
| "B": -7.1484375, |
| "C": -8.6328125, |
| "D": -7.5234375, |
| "E": -7.953125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-81", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.7265625, |
| "scores": { |
| "A": -12.1875, |
| "B": -11.984375, |
| "C": -10.6015625, |
| "D": -14.203125, |
| "E": -13.328125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.7109375, |
| "scores": { |
| "A": -10.2890625, |
| "B": -13.0390625, |
| "C": -12.3828125, |
| "D": -12.15625, |
| "E": -13.0 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-82", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.78125, |
| "scores": { |
| "A": -12.3046875, |
| "B": -9.84375, |
| "C": -12.625, |
| "D": -14.0078125, |
| "E": -11.7890625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.359375, |
| "scores": { |
| "A": -6.59375, |
| "B": -9.5390625, |
| "C": -10.953125, |
| "D": -10.6875, |
| "E": -9.8828125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-83", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -1.921875, |
| "scores": { |
| "A": -10.3046875, |
| "B": -9.1640625, |
| "C": -8.2421875, |
| "D": -10.1640625, |
| "E": -9.40625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.078125, |
| "scores": { |
| "A": -5.59375, |
| "B": -7.6640625, |
| "C": -9.4296875, |
| "D": -8.671875, |
| "E": -10.1015625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-84", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.3828125, |
| "scores": { |
| "A": -11.46875, |
| "B": -10.9765625, |
| "C": -9.0859375, |
| "D": -9.7890625, |
| "E": -10.390625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.09375, |
| "scores": { |
| "A": -9.6875, |
| "B": -11.546875, |
| "C": -11.609375, |
| "D": -10.78125, |
| "E": -11.5859375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-85", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.0078125, |
| "scores": { |
| "A": -11.0234375, |
| "B": -9.9296875, |
| "C": -9.015625, |
| "D": -13.453125, |
| "E": -13.5703125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.6484375, |
| "scores": { |
| "A": -6.2265625, |
| "B": -10.890625, |
| "C": -9.875, |
| "D": -11.765625, |
| "E": -14.03125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-86", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.75, |
| "scores": { |
| "A": -10.234375, |
| "B": -8.484375, |
| "C": -10.546875, |
| "D": -12.03125, |
| "E": -11.203125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.78125, |
| "scores": { |
| "A": -9.25, |
| "B": -11.03125, |
| "C": -12.28125, |
| "D": -12.1875, |
| "E": -11.21875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-87", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.625, |
| "scores": { |
| "A": -9.7890625, |
| "B": -9.1640625, |
| "C": -11.234375, |
| "D": -12.0, |
| "E": -11.46875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.90625, |
| "scores": { |
| "A": -6.578125, |
| "B": -9.484375, |
| "C": -9.703125, |
| "D": -9.1875, |
| "E": -11.25 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-88", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.40625, |
| "scores": { |
| "A": -14.6796875, |
| "B": -12.328125, |
| "C": -13.0625, |
| "D": -13.9765625, |
| "E": -13.734375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.9375, |
| "scores": { |
| "A": -5.40625, |
| "B": -6.2578125, |
| "C": -7.46875, |
| "D": -9.0390625, |
| "E": -10.34375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-89", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.02734375, |
| "scores": { |
| "A": -10.921875, |
| "B": -7.89453125, |
| "C": -10.65625, |
| "D": -12.421875, |
| "E": -11.3125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.75, |
| "scores": { |
| "A": -6.359375, |
| "B": -8.109375, |
| "C": -11.4140625, |
| "D": -9.7109375, |
| "E": -9.2890625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-90", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.5, |
| "scores": { |
| "A": -11.0234375, |
| "B": -10.5234375, |
| "C": -12.0234375, |
| "D": -12.125, |
| "E": -13.828125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.6015625, |
| "scores": { |
| "A": -8.1796875, |
| "B": -9.109375, |
| "C": -10.78125, |
| "D": -9.5625, |
| "E": -10.078125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-91", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.390625, |
| "scores": { |
| "A": -12.015625, |
| "B": -10.046875, |
| "C": -11.4375, |
| "D": -12.140625, |
| "E": -10.671875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.6328125, |
| "scores": { |
| "A": -8.03125, |
| "B": -8.96875, |
| "C": -9.6640625, |
| "D": -9.765625, |
| "E": -9.015625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-92", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.359375, |
| "scores": { |
| "A": -12.046875, |
| "B": -12.40625, |
| "C": -12.75, |
| "D": -14.078125, |
| "E": -14.9375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.984375, |
| "scores": { |
| "A": -11.0234375, |
| "B": -15.0078125, |
| "C": -15.1640625, |
| "D": -13.609375, |
| "E": -17.765625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-93", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.84375, |
| "scores": { |
| "A": -13.046875, |
| "B": -9.09375, |
| "C": -9.984375, |
| "D": -10.78125, |
| "E": -10.9375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.7421875, |
| "scores": { |
| "A": -8.2421875, |
| "B": -12.109375, |
| "C": -15.703125, |
| "D": -13.6875, |
| "E": -14.984375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-94", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -1.921875, |
| "scores": { |
| "A": -12.7265625, |
| "B": -10.6796875, |
| "C": -10.0390625, |
| "D": -13.703125, |
| "E": -11.9609375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.5859375, |
| "scores": { |
| "A": -5.8046875, |
| "B": -6.28125, |
| "C": -9.3125, |
| "D": -10.296875, |
| "E": -11.390625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-95", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -3.9375, |
| "scores": { |
| "A": -11.21875, |
| "B": -9.1015625, |
| "C": -8.0234375, |
| "D": -12.296875, |
| "E": -11.9609375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.4609375, |
| "scores": { |
| "A": -8.203125, |
| "B": -9.6015625, |
| "C": -8.6171875, |
| "D": -9.53125, |
| "E": -9.6640625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-96", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -4.328125, |
| "scores": { |
| "A": -9.84375, |
| "B": -9.140625, |
| "C": -13.0625, |
| "D": -11.453125, |
| "E": -13.46875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.37890625, |
| "scores": { |
| "A": -7.28515625, |
| "B": -8.671875, |
| "C": -13.0, |
| "D": -10.65625, |
| "E": -11.6640625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-97", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.828125, |
| "scores": { |
| "A": -14.46875, |
| "B": -12.09375, |
| "C": -13.828125, |
| "D": -14.921875, |
| "E": -13.8125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.6328125, |
| "scores": { |
| "A": -10.09375, |
| "B": -10.359375, |
| "C": -13.9140625, |
| "D": -14.7265625, |
| "E": -14.140625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-98", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.0078125, |
| "scores": { |
| "A": -12.9921875, |
| "B": -10.796875, |
| "C": -10.578125, |
| "D": -12.5859375, |
| "E": -11.890625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.44140625, |
| "scores": { |
| "A": -4.04296875, |
| "B": -8.203125, |
| "C": -10.59375, |
| "D": -10.484375, |
| "E": -12.0390625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-99", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.703125, |
| "scores": { |
| "A": -10.875, |
| "B": -9.203125, |
| "C": -9.609375, |
| "D": -11.328125, |
| "E": -10.90625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.5859375, |
| "scores": { |
| "A": -7.1484375, |
| "B": -8.3359375, |
| "C": -12.75, |
| "D": -12.609375, |
| "E": -14.734375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-100", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.1484375, |
| "scores": { |
| "A": -9.265625, |
| "B": -9.7265625, |
| "C": -9.1171875, |
| "D": -10.0546875, |
| "E": -10.6015625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.16015625, |
| "scores": { |
| "A": -4.76171875, |
| "B": -9.390625, |
| "C": -10.921875, |
| "D": -11.46875, |
| "E": -13.1875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-101", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.3359375, |
| "scores": { |
| "A": -10.09375, |
| "B": -10.4296875, |
| "C": -11.984375, |
| "D": -10.6015625, |
| "E": -12.453125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 4.4921875, |
| "scores": { |
| "A": -7.59375, |
| "B": -12.0859375, |
| "C": -13.8125, |
| "D": -12.8125, |
| "E": -14.53125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-102", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -4.4609375, |
| "scores": { |
| "A": -11.3046875, |
| "B": -10.4296875, |
| "C": -10.671875, |
| "D": -14.109375, |
| "E": -14.890625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.984375, |
| "scores": { |
| "A": -6.84375, |
| "B": -10.515625, |
| "C": -13.328125, |
| "D": -13.984375, |
| "E": -14.828125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-103", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1484375, |
| "scores": { |
| "A": -9.734375, |
| "B": -8.5234375, |
| "C": -9.6875, |
| "D": -11.4375, |
| "E": -9.671875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.75, |
| "scores": { |
| "A": -7.84375, |
| "B": -8.59375, |
| "C": -10.53125, |
| "D": -9.78125, |
| "E": -8.421875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-104", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -3.3671875, |
| "scores": { |
| "A": -14.203125, |
| "B": -11.3671875, |
| "C": -11.15625, |
| "D": -14.5234375, |
| "E": -15.984375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.8984375, |
| "scores": { |
| "A": -8.5859375, |
| "B": -10.609375, |
| "C": -11.203125, |
| "D": -11.484375, |
| "E": -10.953125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-105", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 3.03125, |
| "scores": { |
| "A": -11.5, |
| "B": -12.0234375, |
| "C": -8.46875, |
| "D": -13.9765625, |
| "E": -13.28125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.12109375, |
| "scores": { |
| "A": -7.45703125, |
| "B": -9.78125, |
| "C": -8.578125, |
| "D": -13.375, |
| "E": -13.59375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-106", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.9296875, |
| "scores": { |
| "A": -10.109375, |
| "B": -9.1796875, |
| "C": -9.4453125, |
| "D": -13.0234375, |
| "E": -13.1328125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.88671875, |
| "scores": { |
| "A": -7.59765625, |
| "B": -11.9453125, |
| "C": -13.4375, |
| "D": -11.484375, |
| "E": -14.0703125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-107", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.015625, |
| "scores": { |
| "A": -9.90625, |
| "B": -8.890625, |
| "C": -11.015625, |
| "D": -12.1875, |
| "E": -10.1484375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.1640625, |
| "scores": { |
| "A": -7.0546875, |
| "B": -10.21875, |
| "C": -11.671875, |
| "D": -10.859375, |
| "E": -11.921875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-108", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.65625, |
| "scores": { |
| "A": -12.28125, |
| "B": -9.625, |
| "C": -12.828125, |
| "D": -13.28125, |
| "E": -14.59375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.875, |
| "scores": { |
| "A": -6.484375, |
| "B": -8.359375, |
| "C": -11.03125, |
| "D": -9.9921875, |
| "E": -11.953125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-109", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -0.8203125, |
| "scores": { |
| "A": -10.0546875, |
| "B": -11.3125, |
| "C": -10.7734375, |
| "D": -12.453125, |
| "E": -9.953125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.171875, |
| "scores": { |
| "A": -8.9375, |
| "B": -13.0625, |
| "C": -15.109375, |
| "D": -13.6875, |
| "E": -14.53125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-110", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.28125, |
| "scores": { |
| "A": -12.046875, |
| "B": -8.4375, |
| "C": -11.71875, |
| "D": -12.578125, |
| "E": -15.1328125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.609375, |
| "scores": { |
| "A": -5.6796875, |
| "B": -9.0703125, |
| "C": -11.2890625, |
| "D": -11.3984375, |
| "E": -13.359375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-111", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.203125, |
| "scores": { |
| "A": -9.796875, |
| "B": -9.2734375, |
| "C": -9.4765625, |
| "D": -10.7578125, |
| "E": -11.4296875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.6328125, |
| "scores": { |
| "A": -8.4140625, |
| "B": -12.046875, |
| "C": -12.484375, |
| "D": -13.3125, |
| "E": -14.53125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-112", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -5.109375, |
| "scores": { |
| "A": -16.6875, |
| "B": -16.21875, |
| "C": -11.578125, |
| "D": -13.8203125, |
| "E": -15.546875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.0859375, |
| "scores": { |
| "A": -11.0078125, |
| "B": -13.3203125, |
| "C": -11.015625, |
| "D": -10.921875, |
| "E": -15.7734375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-113", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.359375, |
| "scores": { |
| "A": -10.40625, |
| "B": -9.71875, |
| "C": -10.078125, |
| "D": -12.84375, |
| "E": -11.2578125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.25390625, |
| "scores": { |
| "A": -5.61328125, |
| "B": -11.015625, |
| "C": -12.8671875, |
| "D": -11.5625, |
| "E": -13.203125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-114", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -0.203125, |
| "scores": { |
| "A": -12.5234375, |
| "B": -11.9609375, |
| "C": -12.390625, |
| "D": -12.65625, |
| "E": -11.7578125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.890625, |
| "scores": { |
| "A": -5.28125, |
| "B": -10.171875, |
| "C": -12.328125, |
| "D": -10.734375, |
| "E": -12.53125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-115", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.28125, |
| "scores": { |
| "A": -10.9375, |
| "B": -11.484375, |
| "C": -11.296875, |
| "D": -12.328125, |
| "E": -12.21875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.3515625, |
| "scores": { |
| "A": -9.8984375, |
| "B": -12.703125, |
| "C": -13.203125, |
| "D": -11.328125, |
| "E": -11.25 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-116", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.7109375, |
| "scores": { |
| "A": -12.0390625, |
| "B": -9.7421875, |
| "C": -11.453125, |
| "D": -11.5390625, |
| "E": -11.8203125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.51171875, |
| "scores": { |
| "A": -7.28125, |
| "B": -7.4921875, |
| "C": -8.8203125, |
| "D": -6.98046875, |
| "E": -7.3046875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-117", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.203125, |
| "scores": { |
| "A": -11.703125, |
| "B": -9.125, |
| "C": -9.078125, |
| "D": -11.03125, |
| "E": -11.28125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.0234375, |
| "scores": { |
| "A": -6.42578125, |
| "B": -6.91015625, |
| "C": -7.69921875, |
| "D": -8.4375, |
| "E": -7.44921875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-118", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.34375, |
| "scores": { |
| "A": -12.6953125, |
| "B": -14.203125, |
| "C": -13.0390625, |
| "D": -14.6640625, |
| "E": -13.8125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.234375, |
| "scores": { |
| "A": -7.921875, |
| "B": -10.390625, |
| "C": -10.15625, |
| "D": -11.53125, |
| "E": -11.34375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-119", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.921875, |
| "scores": { |
| "A": -12.75, |
| "B": -11.296875, |
| "C": -13.4296875, |
| "D": -13.21875, |
| "E": -12.75 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.125, |
| "scores": { |
| "A": -6.8046875, |
| "B": -7.6171875, |
| "C": -10.0625, |
| "D": -10.9296875, |
| "E": -11.828125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-120", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.3515625, |
| "scores": { |
| "A": -12.625, |
| "B": -10.171875, |
| "C": -10.5234375, |
| "D": -11.96875, |
| "E": -12.625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.84375, |
| "scores": { |
| "A": -7.4375, |
| "B": -12.28125, |
| "C": -11.75, |
| "D": -11.984375, |
| "E": -13.75 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-121", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.6875, |
| "scores": { |
| "A": -12.203125, |
| "B": -10.84375, |
| "C": -11.015625, |
| "D": -13.53125, |
| "E": -13.328125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.828125, |
| "scores": { |
| "A": -7.1875, |
| "B": -9.34375, |
| "C": -10.5703125, |
| "D": -12.015625, |
| "E": -11.6640625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-122", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.671875, |
| "scores": { |
| "A": -11.09375, |
| "B": -10.421875, |
| "C": -13.25, |
| "D": -13.296875, |
| "E": -13.5 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.328125, |
| "scores": { |
| "A": -6.90625, |
| "B": -9.234375, |
| "C": -12.421875, |
| "D": -11.0, |
| "E": -12.1796875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-123", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.9296875, |
| "scores": { |
| "A": -12.8125, |
| "B": -13.265625, |
| "C": -10.09375, |
| "D": -12.0234375, |
| "E": -12.84375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.0625, |
| "scores": { |
| "A": -7.71875, |
| "B": -10.9296875, |
| "C": -11.78125, |
| "D": -11.5546875, |
| "E": -13.9375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-124", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.96875, |
| "scores": { |
| "A": -10.953125, |
| "B": -9.9296875, |
| "C": -10.8984375, |
| "D": -11.1875, |
| "E": -11.4375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.4140625, |
| "scores": { |
| "A": -9.1328125, |
| "B": -11.734375, |
| "C": -11.546875, |
| "D": -11.625, |
| "E": -11.328125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-125", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.25, |
| "scores": { |
| "A": -12.8984375, |
| "B": -12.015625, |
| "C": -10.3671875, |
| "D": -10.6171875, |
| "E": -11.515625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.16796875, |
| "scores": { |
| "A": -6.90234375, |
| "B": -8.6875, |
| "C": -10.0703125, |
| "D": -9.84375, |
| "E": -10.1640625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-126", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.546875, |
| "scores": { |
| "A": -11.015625, |
| "B": -8.3515625, |
| "C": -8.8984375, |
| "D": -9.953125, |
| "E": -10.5 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.8671875, |
| "scores": { |
| "A": -5.8046875, |
| "B": -10.6484375, |
| "C": -11.671875, |
| "D": -11.6796875, |
| "E": -13.015625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-127", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.0625, |
| "scores": { |
| "A": -10.5546875, |
| "B": -10.7734375, |
| "C": -10.859375, |
| "D": -10.4921875, |
| "E": -11.0703125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.21875, |
| "scores": { |
| "A": -6.5, |
| "B": -8.7109375, |
| "C": -9.375, |
| "D": -7.71875, |
| "E": -8.375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-128", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.0078125, |
| "scores": { |
| "A": -10.7890625, |
| "B": -10.34375, |
| "C": -10.8359375, |
| "D": -13.4609375, |
| "E": -12.3515625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.97265625, |
| "scores": { |
| "A": -5.43359375, |
| "B": -7.93359375, |
| "C": -9.234375, |
| "D": -11.1875, |
| "E": -10.40625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-129", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.609375, |
| "scores": { |
| "A": -8.3125, |
| "B": -10.671875, |
| "C": -9.640625, |
| "D": -10.921875, |
| "E": -9.640625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.0390625, |
| "scores": { |
| "A": -6.0390625, |
| "B": -8.234375, |
| "C": -7.6875, |
| "D": -8.078125, |
| "E": -8.28125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-130", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.5625, |
| "scores": { |
| "A": -12.53125, |
| "B": -11.625, |
| "C": -14.40625, |
| "D": -11.0625, |
| "E": -12.203125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.62890625, |
| "scores": { |
| "A": -7.70703125, |
| "B": -7.73046875, |
| "C": -10.4296875, |
| "D": -8.3359375, |
| "E": -8.21875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-131", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.3046875, |
| "scores": { |
| "A": -14.7265625, |
| "B": -9.6171875, |
| "C": -12.921875, |
| "D": -13.8828125, |
| "E": -14.4765625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.75, |
| "scores": { |
| "A": -9.296875, |
| "B": -8.546875, |
| "C": -11.984375, |
| "D": -11.671875, |
| "E": -12.25 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-132", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.1640625, |
| "scores": { |
| "A": -12.734375, |
| "B": -9.8671875, |
| "C": -11.4375, |
| "D": -11.078125, |
| "E": -13.03125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.515625, |
| "scores": { |
| "A": -9.109375, |
| "B": -9.109375, |
| "C": -10.546875, |
| "D": -10.34375, |
| "E": -10.625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-133", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -4.1015625, |
| "scores": { |
| "A": -14.859375, |
| "B": -16.15625, |
| "C": -10.7578125, |
| "D": -14.6015625, |
| "E": -13.9765625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.8203125, |
| "scores": { |
| "A": -8.59375, |
| "B": -13.015625, |
| "C": -12.4140625, |
| "D": -14.5625, |
| "E": -14.453125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-134", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.46875, |
| "scores": { |
| "A": -10.7109375, |
| "B": -11.8984375, |
| "C": -13.1484375, |
| "D": -13.1796875, |
| "E": -13.21875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.0, |
| "scores": { |
| "A": -7.7265625, |
| "B": -10.8125, |
| "C": -11.3671875, |
| "D": -12.7265625, |
| "E": -14.625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-135", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.515625, |
| "scores": { |
| "A": -13.609375, |
| "B": -10.125, |
| "C": -10.8984375, |
| "D": -13.640625, |
| "E": -13.96875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.02734375, |
| "scores": { |
| "A": -7.17578125, |
| "B": -8.53125, |
| "C": -9.078125, |
| "D": -9.203125, |
| "E": -10.515625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-136", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.6875, |
| "scores": { |
| "A": -12.5546875, |
| "B": -11.8671875, |
| "C": -11.921875, |
| "D": -14.296875, |
| "E": -14.3984375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.875, |
| "scores": { |
| "A": -10.15625, |
| "B": -12.46875, |
| "C": -12.46875, |
| "D": -12.03125, |
| "E": -13.0703125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-137", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.421875, |
| "scores": { |
| "A": -10.4375, |
| "B": -10.5078125, |
| "C": -10.0859375, |
| "D": -12.4375, |
| "E": -12.859375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.3125, |
| "scores": { |
| "A": -9.8125, |
| "B": -13.125, |
| "C": -11.859375, |
| "D": -12.8671875, |
| "E": -13.4140625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-138", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.1328125, |
| "scores": { |
| "A": -11.515625, |
| "B": -11.3828125, |
| "C": -13.8515625, |
| "D": -13.59375, |
| "E": -12.3359375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 2.3984375, |
| "scores": { |
| "A": -9.328125, |
| "B": -11.9453125, |
| "C": -13.375, |
| "D": -12.328125, |
| "E": -11.7265625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-139", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -4.9921875, |
| "scores": { |
| "A": -11.2421875, |
| "B": -9.6640625, |
| "C": -11.1796875, |
| "D": -12.578125, |
| "E": -14.65625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.05859375, |
| "scores": { |
| "A": -7.59765625, |
| "B": -9.390625, |
| "C": -10.296875, |
| "D": -9.796875, |
| "E": -11.65625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-140", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.09375, |
| "scores": { |
| "A": -12.171875, |
| "B": -10.953125, |
| "C": -12.484375, |
| "D": -12.046875, |
| "E": -12.828125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.09375, |
| "scores": { |
| "A": -8.640625, |
| "B": -9.734375, |
| "C": -10.828125, |
| "D": -10.234375, |
| "E": -11.625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-141", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.390625, |
| "scores": { |
| "A": -15.65625, |
| "B": -14.0, |
| "C": -12.3359375, |
| "D": -12.7265625, |
| "E": -13.421875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -1.5078125, |
| "scores": { |
| "A": -9.03125, |
| "B": -10.1640625, |
| "C": -9.59375, |
| "D": -8.109375, |
| "E": -8.0859375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-142", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.0390625, |
| "scores": { |
| "A": -12.0546875, |
| "B": -12.09375, |
| "C": -12.2421875, |
| "D": -13.71875, |
| "E": -13.640625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 2.3515625, |
| "scores": { |
| "A": -10.3203125, |
| "B": -13.515625, |
| "C": -12.671875, |
| "D": -13.125, |
| "E": -14.1875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-143", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.2109375, |
| "scores": { |
| "A": -10.8515625, |
| "B": -9.3515625, |
| "C": -10.453125, |
| "D": -10.5625, |
| "E": -10.328125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.875, |
| "scores": { |
| "A": -8.78125, |
| "B": -11.7734375, |
| "C": -12.0390625, |
| "D": -10.65625, |
| "E": -11.0703125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-144", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.5234375, |
| "scores": { |
| "A": -14.15625, |
| "B": -13.6328125, |
| "C": -16.546875, |
| "D": -16.75, |
| "E": -16.03125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 7.6953125, |
| "scores": { |
| "A": -6.4453125, |
| "B": -14.140625, |
| "C": -17.203125, |
| "D": -14.1953125, |
| "E": -15.3984375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-145", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -0.125, |
| "scores": { |
| "A": -13.59375, |
| "B": -11.84375, |
| "C": -14.28125, |
| "D": -13.25, |
| "E": -11.71875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.28125, |
| "scores": { |
| "A": -8.40625, |
| "B": -11.6875, |
| "C": -15.328125, |
| "D": -13.34375, |
| "E": -12.859375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-146", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -1.6953125, |
| "scores": { |
| "A": -9.765625, |
| "B": -11.140625, |
| "C": -9.4453125, |
| "D": -10.703125, |
| "E": -9.9375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.390625, |
| "scores": { |
| "A": -8.3828125, |
| "B": -10.7734375, |
| "C": -14.0390625, |
| "D": -13.5546875, |
| "E": -13.0390625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-147", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.1484375, |
| "scores": { |
| "A": -9.875, |
| "B": -10.65625, |
| "C": -11.03125, |
| "D": -12.515625, |
| "E": -10.0234375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.203125, |
| "scores": { |
| "A": -6.390625, |
| "B": -8.140625, |
| "C": -9.0703125, |
| "D": -10.21875, |
| "E": -8.59375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-148", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.09375, |
| "scores": { |
| "A": -11.84375, |
| "B": -8.6875, |
| "C": -9.390625, |
| "D": -8.59375, |
| "E": -10.328125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.2890625, |
| "scores": { |
| "A": -8.21875, |
| "B": -9.21875, |
| "C": -9.4140625, |
| "D": -8.5078125, |
| "E": -9.5859375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-149", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.984375, |
| "scores": { |
| "A": -11.984375, |
| "B": -10.328125, |
| "C": -11.265625, |
| "D": -11.3125, |
| "E": -11.625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.5078125, |
| "scores": { |
| "A": -7.390625, |
| "B": -10.25, |
| "C": -13.9375, |
| "D": -13.8984375, |
| "E": -15.1875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-150", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.9140625, |
| "scores": { |
| "A": -12.328125, |
| "B": -9.484375, |
| "C": -10.9921875, |
| "D": -12.3984375, |
| "E": -12.6328125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.38671875, |
| "scores": { |
| "A": -7.01953125, |
| "B": -12.03125, |
| "C": -12.453125, |
| "D": -12.40625, |
| "E": -13.796875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-151", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -2.625, |
| "scores": { |
| "A": -15.140625, |
| "B": -13.625, |
| "C": -14.5625, |
| "D": -15.296875, |
| "E": -11.9375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.1484375, |
| "scores": { |
| "A": -10.265625, |
| "B": -11.4375, |
| "C": -13.4140625, |
| "D": -11.515625, |
| "E": -12.484375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-152", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.65625, |
| "scores": { |
| "A": -12.15625, |
| "B": -11.09375, |
| "C": -11.75, |
| "D": -11.765625, |
| "E": -11.75 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.6015625, |
| "scores": { |
| "A": -8.2734375, |
| "B": -12.875, |
| "C": -16.15625, |
| "D": -13.109375, |
| "E": -13.90625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-153", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.46875, |
| "scores": { |
| "A": -10.4609375, |
| "B": -10.9296875, |
| "C": -12.1640625, |
| "D": -15.109375, |
| "E": -13.3125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.375, |
| "scores": { |
| "A": -5.125, |
| "B": -8.75, |
| "C": -8.5, |
| "D": -12.921875, |
| "E": -12.109375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-154", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.9921875, |
| "scores": { |
| "A": -10.4296875, |
| "B": -11.890625, |
| "C": -11.7890625, |
| "D": -15.421875, |
| "E": -13.53125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.6875, |
| "scores": { |
| "A": -6.578125, |
| "B": -9.6484375, |
| "C": -11.0078125, |
| "D": -11.265625, |
| "E": -11.0859375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-155", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -3.25, |
| "scores": { |
| "A": -11.7578125, |
| "B": -10.15625, |
| "C": -8.5078125, |
| "D": -12.3046875, |
| "E": -12.296875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.6953125, |
| "scores": { |
| "A": -6.5625, |
| "B": -9.1015625, |
| "C": -8.2578125, |
| "D": -9.484375, |
| "E": -8.8671875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-156", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.546875, |
| "scores": { |
| "A": -11.046875, |
| "B": -9.5, |
| "C": -10.5390625, |
| "D": -10.7265625, |
| "E": -10.1328125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.546875, |
| "scores": { |
| "A": -8.578125, |
| "B": -9.53125, |
| "C": -10.578125, |
| "D": -9.21875, |
| "E": -9.125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-157", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.28125, |
| "scores": { |
| "A": -10.46875, |
| "B": -8.765625, |
| "C": -9.046875, |
| "D": -9.90625, |
| "E": -10.0625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -1.28125, |
| "scores": { |
| "A": -7.140625, |
| "B": -8.390625, |
| "C": -8.2734375, |
| "D": -7.359375, |
| "E": -6.9921875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-158", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.6171875, |
| "scores": { |
| "A": -11.875, |
| "B": -9.8671875, |
| "C": -12.375, |
| "D": -12.484375, |
| "E": -12.4375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.828125, |
| "scores": { |
| "A": -7.9453125, |
| "B": -10.96875, |
| "C": -9.1328125, |
| "D": -8.7734375, |
| "E": -9.625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-159", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.203125, |
| "scores": { |
| "A": -14.390625, |
| "B": -10.96875, |
| "C": -12.171875, |
| "D": -14.21875, |
| "E": -13.125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -1.5625, |
| "scores": { |
| "A": -10.09375, |
| "B": -10.09375, |
| "C": -11.546875, |
| "D": -9.984375, |
| "E": -10.1171875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-160", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.859375, |
| "scores": { |
| "A": -11.5859375, |
| "B": -14.2109375, |
| "C": -14.4609375, |
| "D": -14.4453125, |
| "E": -14.359375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.6953125, |
| "scores": { |
| "A": -7.6484375, |
| "B": -11.265625, |
| "C": -12.71875, |
| "D": -12.34375, |
| "E": -12.890625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-161", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.1953125, |
| "scores": { |
| "A": -14.890625, |
| "B": -10.3125, |
| "C": -12.5078125, |
| "D": -12.6953125, |
| "E": -15.1796875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.21875, |
| "scores": { |
| "A": -6.8515625, |
| "B": -7.0078125, |
| "C": -9.0703125, |
| "D": -9.6484375, |
| "E": -9.453125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-162", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.234375, |
| "scores": { |
| "A": -10.1484375, |
| "B": -10.5, |
| "C": -12.3828125, |
| "D": -11.859375, |
| "E": -11.9921875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.578125, |
| "scores": { |
| "A": -5.546875, |
| "B": -10.59375, |
| "C": -10.125, |
| "D": -10.140625, |
| "E": -11.59375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-163", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.34375, |
| "scores": { |
| "A": -10.15625, |
| "B": -10.65625, |
| "C": -11.5, |
| "D": -14.0625, |
| "E": -11.96875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.609375, |
| "scores": { |
| "A": -7.6796875, |
| "B": -11.90625, |
| "C": -12.2890625, |
| "D": -12.3671875, |
| "E": -10.6640625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-164", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.0234375, |
| "scores": { |
| "A": -11.75, |
| "B": -11.3671875, |
| "C": -12.578125, |
| "D": -14.390625, |
| "E": -13.609375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.5, |
| "scores": { |
| "A": -10.015625, |
| "B": -13.3125, |
| "C": -14.359375, |
| "D": -12.515625, |
| "E": -13.640625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-165", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.390625, |
| "scores": { |
| "A": -13.828125, |
| "B": -11.390625, |
| "C": -14.1875, |
| "D": -12.78125, |
| "E": -14.859375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.03125, |
| "scores": { |
| "A": -7.125, |
| "B": -8.75, |
| "C": -13.1328125, |
| "D": -13.15625, |
| "E": -14.8671875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-166", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.734375, |
| "scores": { |
| "A": -13.2265625, |
| "B": -8.90625, |
| "C": -9.71875, |
| "D": -10.8046875, |
| "E": -11.640625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.4296875, |
| "scores": { |
| "A": -6.2421875, |
| "B": -8.828125, |
| "C": -9.09375, |
| "D": -9.59375, |
| "E": -11.671875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-167", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.28125, |
| "scores": { |
| "A": -13.3125, |
| "B": -10.640625, |
| "C": -12.921875, |
| "D": -16.09375, |
| "E": -14.75 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.328125, |
| "scores": { |
| "A": -8.015625, |
| "B": -11.34375, |
| "C": -15.125, |
| "D": -13.3125, |
| "E": -14.375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-168", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.59375, |
| "scores": { |
| "A": -11.9375, |
| "B": -12.078125, |
| "C": -12.59375, |
| "D": -13.53125, |
| "E": -13.71875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.8125, |
| "scores": { |
| "A": -8.265625, |
| "B": -12.84375, |
| "C": -14.609375, |
| "D": -13.078125, |
| "E": -14.4375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-169", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.265625, |
| "scores": { |
| "A": -11.6953125, |
| "B": -9.4296875, |
| "C": -11.6328125, |
| "D": -13.53125, |
| "E": -12.4609375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.140625, |
| "scores": { |
| "A": -9.15625, |
| "B": -10.296875, |
| "C": -10.46875, |
| "D": -11.28125, |
| "E": -11.203125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-170", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.6953125, |
| "scores": { |
| "A": -12.6796875, |
| "B": -13.546875, |
| "C": -15.734375, |
| "D": -14.1875, |
| "E": -14.375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.3984375, |
| "scores": { |
| "A": -7.4453125, |
| "B": -11.609375, |
| "C": -12.671875, |
| "D": -10.328125, |
| "E": -10.84375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-171", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.3203125, |
| "scores": { |
| "A": -12.734375, |
| "B": -11.4140625, |
| "C": -12.1015625, |
| "D": -14.3125, |
| "E": -13.2265625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.89453125, |
| "scores": { |
| "A": -7.27734375, |
| "B": -11.171875, |
| "C": -14.109375, |
| "D": -13.875, |
| "E": -14.140625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-172", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.5546875, |
| "scores": { |
| "A": -11.8984375, |
| "B": -9.34375, |
| "C": -9.6796875, |
| "D": -12.5859375, |
| "E": -11.3515625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.265625, |
| "scores": { |
| "A": -7.75, |
| "B": -8.015625, |
| "C": -8.984375, |
| "D": -10.234375, |
| "E": -8.7734375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-173", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.578125, |
| "scores": { |
| "A": -9.578125, |
| "B": -13.25, |
| "C": -13.359375, |
| "D": -14.15625, |
| "E": -12.0625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.5, |
| "scores": { |
| "A": -7.09375, |
| "B": -12.1875, |
| "C": -14.15625, |
| "D": -14.59375, |
| "E": -14.734375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-174", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.25, |
| "scores": { |
| "A": -14.1875, |
| "B": -13.484375, |
| "C": -14.6875, |
| "D": -13.5625, |
| "E": -13.734375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.984375, |
| "scores": { |
| "A": -8.5, |
| "B": -11.28125, |
| "C": -14.546875, |
| "D": -12.5625, |
| "E": -14.484375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-175", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.3828125, |
| "scores": { |
| "A": -12.578125, |
| "B": -11.2734375, |
| "C": -12.71875, |
| "D": -13.6171875, |
| "E": -12.65625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.2109375, |
| "scores": { |
| "A": -7.8359375, |
| "B": -12.359375, |
| "C": -15.453125, |
| "D": -11.6484375, |
| "E": -14.046875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-176", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.796875, |
| "scores": { |
| "A": -10.96875, |
| "B": -10.25, |
| "C": -13.046875, |
| "D": -13.21875, |
| "E": -11.5625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.046875, |
| "scores": { |
| "A": -8.546875, |
| "B": -10.9375, |
| "C": -12.59375, |
| "D": -10.203125, |
| "E": -10.328125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-177", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.09375, |
| "scores": { |
| "A": -9.609375, |
| "B": -9.890625, |
| "C": -13.703125, |
| "D": -11.875, |
| "E": -11.5 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.8359375, |
| "scores": { |
| "A": -5.1953125, |
| "B": -7.3671875, |
| "C": -12.03125, |
| "D": -9.5625, |
| "E": -11.109375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-178", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.4609375, |
| "scores": { |
| "A": -13.1953125, |
| "B": -11.1015625, |
| "C": -12.8203125, |
| "D": -12.625, |
| "E": -10.640625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.72265625, |
| "scores": { |
| "A": -5.77734375, |
| "B": -10.375, |
| "C": -8.5, |
| "D": -10.84375, |
| "E": -12.5 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-179", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.5859375, |
| "scores": { |
| "A": -9.1640625, |
| "B": -10.75, |
| "C": -11.7734375, |
| "D": -12.671875, |
| "E": -12.8046875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.15625, |
| "scores": { |
| "A": -5.9375, |
| "B": -8.09375, |
| "C": -10.34375, |
| "D": -10.609375, |
| "E": -12.296875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-180", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.5078125, |
| "scores": { |
| "A": -11.15625, |
| "B": -9.8515625, |
| "C": -12.359375, |
| "D": -13.1796875, |
| "E": -12.3828125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.53125, |
| "scores": { |
| "A": -10.609375, |
| "B": -11.90625, |
| "C": -12.140625, |
| "D": -11.0, |
| "E": -11.59375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-181", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7734375, |
| "scores": { |
| "A": -9.6328125, |
| "B": -8.859375, |
| "C": -11.828125, |
| "D": -11.640625, |
| "E": -10.6875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.5078125, |
| "scores": { |
| "A": -7.5546875, |
| "B": -9.0625, |
| "C": -10.4453125, |
| "D": -9.140625, |
| "E": -9.0078125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-182", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.8203125, |
| "scores": { |
| "A": -11.703125, |
| "B": -15.640625, |
| "C": -15.5234375, |
| "D": -15.2578125, |
| "E": -15.8046875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.4765625, |
| "scores": { |
| "A": -7.3046875, |
| "B": -11.609375, |
| "C": -13.78125, |
| "D": -14.828125, |
| "E": -15.296875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-183", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.40625, |
| "scores": { |
| "A": -12.6484375, |
| "B": -9.3671875, |
| "C": -10.7734375, |
| "D": -13.140625, |
| "E": -13.125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.71875, |
| "scores": { |
| "A": -8.6640625, |
| "B": -7.5078125, |
| "C": -6.7890625, |
| "D": -9.3046875, |
| "E": -10.7578125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-184", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.1875, |
| "scores": { |
| "A": -11.359375, |
| "B": -9.9375, |
| "C": -11.1875, |
| "D": -13.125, |
| "E": -12.328125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.0703125, |
| "scores": { |
| "A": -8.7109375, |
| "B": -11.3828125, |
| "C": -11.5, |
| "D": -10.78125, |
| "E": -11.2578125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-185", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.1796875, |
| "scores": { |
| "A": -11.375, |
| "B": -9.40625, |
| "C": -12.5859375, |
| "D": -12.453125, |
| "E": -12.6484375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -8.36328125, |
| "scores": { |
| "A": -5.95703125, |
| "B": -12.796875, |
| "C": -14.3203125, |
| "D": -13.2578125, |
| "E": -14.75 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-186", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.40625, |
| "scores": { |
| "A": -11.265625, |
| "B": -11.671875, |
| "C": -14.078125, |
| "D": -14.6875, |
| "E": -13.640625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 2.421875, |
| "scores": { |
| "A": -7.59375, |
| "B": -10.015625, |
| "C": -13.9375, |
| "D": -12.765625, |
| "E": -12.796875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-187", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.078125, |
| "scores": { |
| "A": -12.546875, |
| "B": -11.0, |
| "C": -13.078125, |
| "D": -14.6171875, |
| "E": -14.2265625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.29296875, |
| "scores": { |
| "A": -7.48828125, |
| "B": -9.2890625, |
| "C": -10.78125, |
| "D": -11.15625, |
| "E": -11.203125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-188", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.125, |
| "scores": { |
| "A": -12.140625, |
| "B": -13.03125, |
| "C": -13.734375, |
| "D": -14.09375, |
| "E": -15.265625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.21875, |
| "scores": { |
| "A": -11.125, |
| "B": -13.6875, |
| "C": -16.390625, |
| "D": -12.75, |
| "E": -16.34375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-189", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2109375, |
| "scores": { |
| "A": -14.5390625, |
| "B": -11.546875, |
| "C": -11.8046875, |
| "D": -11.7578125, |
| "E": -13.34375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.84375, |
| "scores": { |
| "A": -9.875, |
| "B": -10.3828125, |
| "C": -10.28125, |
| "D": -9.5390625, |
| "E": -9.5625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-190", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.34375, |
| "scores": { |
| "A": -13.6015625, |
| "B": -10.7734375, |
| "C": -10.4296875, |
| "D": -13.3671875, |
| "E": -13.9296875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.5, |
| "scores": { |
| "A": -6.3515625, |
| "B": -9.2578125, |
| "C": -12.8515625, |
| "D": -10.84375, |
| "E": -12.953125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-191", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -11.890625, |
| "B": -10.9375, |
| "C": -13.640625, |
| "D": -14.109375, |
| "E": -13.765625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.09375, |
| "scores": { |
| "A": -6.4375, |
| "B": -8.53125, |
| "C": -11.7734375, |
| "D": -11.28125, |
| "E": -12.3203125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-192", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.125, |
| "scores": { |
| "A": -13.6953125, |
| "B": -13.671875, |
| "C": -13.5703125, |
| "D": -14.453125, |
| "E": -13.6171875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.6875, |
| "scores": { |
| "A": -8.875, |
| "B": -12.59375, |
| "C": -13.15625, |
| "D": -11.125, |
| "E": -10.5625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-193", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.7734375, |
| "scores": { |
| "A": -11.515625, |
| "B": -8.7421875, |
| "C": -9.84375, |
| "D": -10.28125, |
| "E": -10.03125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.2734375, |
| "scores": { |
| "A": -8.5703125, |
| "B": -10.296875, |
| "C": -10.296875, |
| "D": -9.84375, |
| "E": -9.953125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-194", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.15625, |
| "scores": { |
| "A": -9.4609375, |
| "B": -9.4140625, |
| "C": -10.640625, |
| "D": -11.5703125, |
| "E": -12.359375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.71484375, |
| "scores": { |
| "A": -7.16796875, |
| "B": -12.7421875, |
| "C": -14.03125, |
| "D": -13.8828125, |
| "E": -15.5 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-195", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -4.0625, |
| "scores": { |
| "A": -11.25, |
| "B": -8.28125, |
| "C": -10.234375, |
| "D": -12.34375, |
| "E": -12.640625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.2109375, |
| "scores": { |
| "A": -8.0546875, |
| "B": -9.1796875, |
| "C": -9.2890625, |
| "D": -10.265625, |
| "E": -10.5703125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-196", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.8359375, |
| "scores": { |
| "A": -12.8125, |
| "B": -10.9765625, |
| "C": -11.9453125, |
| "D": -12.734375, |
| "E": -14.8203125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.40625, |
| "scores": { |
| "A": -9.046875, |
| "B": -11.1953125, |
| "C": -11.7734375, |
| "D": -10.453125, |
| "E": -12.75 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-197", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.6953125, |
| "scores": { |
| "A": -10.984375, |
| "B": -8.9375, |
| "C": -9.6328125, |
| "D": -9.9453125, |
| "E": -10.46875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.484375, |
| "scores": { |
| "A": -8.5390625, |
| "B": -10.0703125, |
| "C": -12.0234375, |
| "D": -8.8046875, |
| "E": -9.2734375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-198", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.421875, |
| "scores": { |
| "A": -11.6015625, |
| "B": -9.5, |
| "C": -9.921875, |
| "D": -13.40625, |
| "E": -11.8359375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.078125, |
| "scores": { |
| "A": -9.3125, |
| "B": -11.125, |
| "C": -9.390625, |
| "D": -11.015625, |
| "E": -11.4375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-199", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.21875, |
| "scores": { |
| "A": -10.2578125, |
| "B": -14.28125, |
| "C": -15.4765625, |
| "D": -16.046875, |
| "E": -13.0859375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.1484375, |
| "scores": { |
| "A": -9.6484375, |
| "B": -12.765625, |
| "C": -14.796875, |
| "D": -14.5625, |
| "E": -15.265625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-200", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -0.28125, |
| "scores": { |
| "A": -12.2578125, |
| "B": -11.9921875, |
| "C": -13.625, |
| "D": -12.015625, |
| "E": -11.7109375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.265625, |
| "scores": { |
| "A": -7.9140625, |
| "B": -11.1796875, |
| "C": -13.5546875, |
| "D": -9.984375, |
| "E": -9.2578125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-201", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.5546875, |
| "scores": { |
| "A": -10.8125, |
| "B": -10.2578125, |
| "C": -10.46875, |
| "D": -11.625, |
| "E": -12.4375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.890625, |
| "scores": { |
| "A": -9.453125, |
| "B": -11.8671875, |
| "C": -13.890625, |
| "D": -11.34375, |
| "E": -14.578125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-202", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -1.6875, |
| "scores": { |
| "A": -11.5625, |
| "B": -11.890625, |
| "C": -10.765625, |
| "D": -11.921875, |
| "E": -10.203125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.4921875, |
| "scores": { |
| "A": -6.8671875, |
| "B": -10.359375, |
| "C": -13.8203125, |
| "D": -13.0078125, |
| "E": -13.609375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-203", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.9140625, |
| "scores": { |
| "A": -9.90625, |
| "B": -9.8125, |
| "C": -8.4921875, |
| "D": -11.40625, |
| "E": -10.0 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.9296875, |
| "scores": { |
| "A": -8.015625, |
| "B": -9.7578125, |
| "C": -10.0546875, |
| "D": -8.9453125, |
| "E": -10.2265625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-204", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.0625, |
| "scores": { |
| "A": -9.65625, |
| "B": -9.9765625, |
| "C": -9.71875, |
| "D": -11.1640625, |
| "E": -10.765625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.6171875, |
| "scores": { |
| "A": -5.9921875, |
| "B": -10.0, |
| "C": -10.609375, |
| "D": -11.453125, |
| "E": -11.1875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-205", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.75, |
| "scores": { |
| "A": -9.7109375, |
| "B": -8.2734375, |
| "C": -10.3828125, |
| "D": -11.78125, |
| "E": -11.0234375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.7421875, |
| "scores": { |
| "A": -7.8359375, |
| "B": -9.9921875, |
| "C": -11.4921875, |
| "D": -11.3125, |
| "E": -12.578125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-206", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.1796875, |
| "scores": { |
| "A": -11.6171875, |
| "B": -10.96875, |
| "C": -11.1484375, |
| "D": -12.84375, |
| "E": -14.0 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.03125, |
| "scores": { |
| "A": -8.0, |
| "B": -11.03125, |
| "C": -12.328125, |
| "D": -11.765625, |
| "E": -14.171875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-207", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -3.328125, |
| "scores": { |
| "A": -12.921875, |
| "B": -13.3125, |
| "C": -10.84375, |
| "D": -15.78125, |
| "E": -14.171875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -8.30078125, |
| "scores": { |
| "A": -6.63671875, |
| "B": -11.015625, |
| "C": -12.171875, |
| "D": -15.0703125, |
| "E": -14.9375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-208", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.8125, |
| "scores": { |
| "A": -12.0625, |
| "B": -10.109375, |
| "C": -11.71875, |
| "D": -10.921875, |
| "E": -13.046875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.703125, |
| "scores": { |
| "A": -9.796875, |
| "B": -11.140625, |
| "C": -13.078125, |
| "D": -11.5, |
| "E": -12.078125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-209", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.9453125, |
| "scores": { |
| "A": -10.890625, |
| "B": -9.78125, |
| "C": -13.171875, |
| "D": -10.203125, |
| "E": -8.8359375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.015625, |
| "scores": { |
| "A": -6.84375, |
| "B": -8.828125, |
| "C": -11.0234375, |
| "D": -7.9375, |
| "E": -6.828125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-210", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.546875, |
| "scores": { |
| "A": -10.8125, |
| "B": -10.8359375, |
| "C": -10.90625, |
| "D": -12.609375, |
| "E": -11.359375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.6875, |
| "scores": { |
| "A": -8.609375, |
| "B": -10.140625, |
| "C": -9.984375, |
| "D": -9.515625, |
| "E": -9.296875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-211", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.5234375, |
| "scores": { |
| "A": -9.2265625, |
| "B": -8.8125, |
| "C": -10.7890625, |
| "D": -11.3359375, |
| "E": -9.8671875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.296875, |
| "scores": { |
| "A": -6.6015625, |
| "B": -7.9453125, |
| "C": -10.0703125, |
| "D": -8.8984375, |
| "E": -8.984375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-212", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.5078125, |
| "scores": { |
| "A": -11.59375, |
| "B": -9.609375, |
| "C": -11.1171875, |
| "D": -11.7421875, |
| "E": -12.8359375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.390625, |
| "scores": { |
| "A": -4.984375, |
| "B": -8.375, |
| "C": -11.28125, |
| "D": -9.4140625, |
| "E": -11.890625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-213", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.734375, |
| "scores": { |
| "A": -13.40625, |
| "B": -12.515625, |
| "C": -15.25, |
| "D": -15.03125, |
| "E": -13.96875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.60546875, |
| "scores": { |
| "A": -7.22265625, |
| "B": -11.375, |
| "C": -13.828125, |
| "D": -12.1875, |
| "E": -12.0625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-214", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -3.265625, |
| "scores": { |
| "A": -10.734375, |
| "B": -9.25, |
| "C": -8.390625, |
| "D": -11.65625, |
| "E": -11.65625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.6640625, |
| "scores": { |
| "A": -7.3203125, |
| "B": -10.78125, |
| "C": -10.140625, |
| "D": -9.6875, |
| "E": -10.984375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-215", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -6.78125, |
| "scores": { |
| "A": -23.0, |
| "B": -18.140625, |
| "C": -16.21875, |
| "D": -22.125, |
| "E": -22.1875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.6640625, |
| "scores": { |
| "A": -8.2734375, |
| "B": -9.5625, |
| "C": -8.9375, |
| "D": -11.0078125, |
| "E": -11.5234375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-216", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.640625, |
| "scores": { |
| "A": -12.78125, |
| "B": -9.7421875, |
| "C": -10.3828125, |
| "D": -12.75, |
| "E": -12.796875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.94140625, |
| "scores": { |
| "A": -7.49609375, |
| "B": -9.6796875, |
| "C": -10.4375, |
| "D": -10.078125, |
| "E": -10.5 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-217", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.421875, |
| "scores": { |
| "A": -12.84375, |
| "B": -8.609375, |
| "C": -12.046875, |
| "D": -15.5625, |
| "E": -12.03125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.375, |
| "scores": { |
| "A": -6.671875, |
| "B": -7.4453125, |
| "C": -9.890625, |
| "D": -10.3359375, |
| "E": -9.046875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-218", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.453125, |
| "scores": { |
| "A": -9.890625, |
| "B": -10.34375, |
| "C": -11.4609375, |
| "D": -12.203125, |
| "E": -11.3203125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 2.921875, |
| "scores": { |
| "A": -6.703125, |
| "B": -9.625, |
| "C": -12.21875, |
| "D": -12.3125, |
| "E": -12.875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-219", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.8359375, |
| "scores": { |
| "A": -10.765625, |
| "B": -10.7578125, |
| "C": -11.59375, |
| "D": -11.3125, |
| "E": -10.90625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -5.4140625, |
| "scores": { |
| "A": -6.5859375, |
| "B": -10.734375, |
| "C": -12.0, |
| "D": -12.125, |
| "E": -13.234375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-220", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.40625, |
| "scores": { |
| "A": -9.5625, |
| "B": -10.84375, |
| "C": -12.40625, |
| "D": -12.21875, |
| "E": -10.96875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -8.08203125, |
| "scores": { |
| "A": -6.37890625, |
| "B": -12.828125, |
| "C": -14.328125, |
| "D": -11.8828125, |
| "E": -14.4609375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-221", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.1875, |
| "scores": { |
| "A": -13.28125, |
| "B": -8.890625, |
| "C": -8.703125, |
| "D": -11.203125, |
| "E": -12.5625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.25, |
| "scores": { |
| "A": -7.98828125, |
| "B": -8.1484375, |
| "C": -7.68359375, |
| "D": -9.546875, |
| "E": -7.93359375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-222", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.8828125, |
| "scores": { |
| "A": -10.78125, |
| "B": -9.4921875, |
| "C": -12.015625, |
| "D": -13.375, |
| "E": -14.171875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.6484375, |
| "scores": { |
| "A": -6.34375, |
| "B": -8.3671875, |
| "C": -11.7265625, |
| "D": -10.9921875, |
| "E": -11.59375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-223", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.09375, |
| "scores": { |
| "A": -10.265625, |
| "B": -7.1484375, |
| "C": -10.2421875, |
| "D": -10.921875, |
| "E": -11.3359375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.09765625, |
| "scores": { |
| "A": -4.67578125, |
| "B": -8.7734375, |
| "C": -12.375, |
| "D": -11.1796875, |
| "E": -13.109375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-224", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.328125, |
| "scores": { |
| "A": -9.65625, |
| "B": -10.03125, |
| "C": -10.046875, |
| "D": -12.984375, |
| "E": -13.359375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.765625, |
| "scores": { |
| "A": -6.25, |
| "B": -9.96875, |
| "C": -11.75, |
| "D": -14.015625, |
| "E": -14.109375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-225", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.9609375, |
| "scores": { |
| "A": -9.90625, |
| "B": -7.4609375, |
| "C": -8.71875, |
| "D": -10.421875, |
| "E": -10.09375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.33984375, |
| "scores": { |
| "A": -7.42578125, |
| "B": -8.734375, |
| "C": -9.703125, |
| "D": -8.765625, |
| "E": -10.296875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-226", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.8046875, |
| "scores": { |
| "A": -9.1015625, |
| "B": -8.953125, |
| "C": -8.7265625, |
| "D": -10.828125, |
| "E": -9.53125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.74609375, |
| "scores": { |
| "A": -5.49609375, |
| "B": -8.390625, |
| "C": -9.40625, |
| "D": -8.8828125, |
| "E": -8.2421875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-227", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.2109375, |
| "scores": { |
| "A": -14.609375, |
| "B": -13.5625, |
| "C": -11.3515625, |
| "D": -11.515625, |
| "E": -11.7890625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.7578125, |
| "scores": { |
| "A": -8.953125, |
| "B": -13.7109375, |
| "C": -14.03125, |
| "D": -10.578125, |
| "E": -14.6484375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-228", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.078125, |
| "scores": { |
| "A": -13.3125, |
| "B": -10.71875, |
| "C": -10.796875, |
| "D": -14.0859375, |
| "E": -14.8984375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.28125, |
| "scores": { |
| "A": -7.171875, |
| "B": -8.453125, |
| "C": -9.515625, |
| "D": -10.125, |
| "E": -8.953125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-229", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -4.25, |
| "scores": { |
| "A": -13.28125, |
| "B": -9.953125, |
| "C": -14.203125, |
| "D": -13.296875, |
| "E": -10.6171875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.5546875, |
| "scores": { |
| "A": -7.2265625, |
| "B": -8.6875, |
| "C": -11.78125, |
| "D": -8.859375, |
| "E": -8.71875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-230", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.390625, |
| "scores": { |
| "A": -9.8125, |
| "B": -9.7109375, |
| "C": -10.1015625, |
| "D": -11.03125, |
| "E": -11.2265625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.8515625, |
| "scores": { |
| "A": -7.59375, |
| "B": -10.15625, |
| "C": -10.4453125, |
| "D": -8.6953125, |
| "E": -9.484375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-231", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -1.5703125, |
| "scores": { |
| "A": -14.2109375, |
| "B": -11.6015625, |
| "C": -10.7421875, |
| "D": -14.015625, |
| "E": -12.3125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.26171875, |
| "scores": { |
| "A": -6.23828125, |
| "B": -7.73046875, |
| "C": -7.69921875, |
| "D": -8.921875, |
| "E": -8.5 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-232", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.390625, |
| "scores": { |
| "A": -10.4296875, |
| "B": -8.25, |
| "C": -10.5078125, |
| "D": -10.8671875, |
| "E": -11.640625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.82421875, |
| "scores": { |
| "A": -6.18359375, |
| "B": -8.03125, |
| "C": -9.046875, |
| "D": -8.5078125, |
| "E": -11.0078125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-233", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.7734375, |
| "scores": { |
| "A": -14.265625, |
| "B": -11.4921875, |
| "C": -12.5390625, |
| "D": -12.203125, |
| "E": -13.578125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.4296875, |
| "scores": { |
| "A": -9.125, |
| "B": -9.75, |
| "C": -10.7734375, |
| "D": -9.5546875, |
| "E": -11.28125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-234", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.75, |
| "scores": { |
| "A": -11.015625, |
| "B": -9.265625, |
| "C": -11.203125, |
| "D": -11.375, |
| "E": -11.359375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.2421875, |
| "scores": { |
| "A": -5.5859375, |
| "B": -8.828125, |
| "C": -12.875, |
| "D": -9.28125, |
| "E": -11.328125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-235", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.0625, |
| "scores": { |
| "A": -10.9921875, |
| "B": -8.9296875, |
| "C": -10.15625, |
| "D": -11.8828125, |
| "E": -10.40625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.46875, |
| "scores": { |
| "A": -7.265625, |
| "B": -8.734375, |
| "C": -9.3515625, |
| "D": -10.6875, |
| "E": -9.828125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-236", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.4375, |
| "scores": { |
| "A": -10.625, |
| "B": -10.1875, |
| "C": -10.4375, |
| "D": -13.5, |
| "E": -11.234375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 2.484375, |
| "scores": { |
| "A": -7.21875, |
| "B": -9.703125, |
| "C": -9.8125, |
| "D": -12.1015625, |
| "E": -12.265625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-237", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.578125, |
| "scores": { |
| "A": -14.484375, |
| "B": -14.9453125, |
| "C": -18.734375, |
| "D": -17.0625, |
| "E": -17.859375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.2421875, |
| "scores": { |
| "A": -8.6171875, |
| "B": -12.8515625, |
| "C": -15.703125, |
| "D": -11.859375, |
| "E": -14.546875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-238", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.734375, |
| "scores": { |
| "A": -12.953125, |
| "B": -12.265625, |
| "C": -12.90625, |
| "D": -11.53125, |
| "E": -13.203125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.7265625, |
| "scores": { |
| "A": -8.828125, |
| "B": -11.5546875, |
| "C": -12.4140625, |
| "D": -10.3671875, |
| "E": -12.140625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-239", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -1.515625, |
| "scores": { |
| "A": -10.2265625, |
| "B": -9.2734375, |
| "C": -8.7109375, |
| "D": -12.0625, |
| "E": -11.703125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.328125, |
| "scores": { |
| "A": -6.8515625, |
| "B": -10.1796875, |
| "C": -11.1796875, |
| "D": -13.3125, |
| "E": -12.7421875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-240", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.4453125, |
| "scores": { |
| "A": -15.140625, |
| "B": -14.28125, |
| "C": -18.34375, |
| "D": -14.2265625, |
| "E": -14.671875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.265625, |
| "scores": { |
| "A": -9.40625, |
| "B": -11.6484375, |
| "C": -14.1875, |
| "D": -11.1015625, |
| "E": -12.671875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-241", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -4.3671875, |
| "scores": { |
| "A": -10.8203125, |
| "B": -8.8828125, |
| "C": -10.421875, |
| "D": -13.25, |
| "E": -12.390625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.8359375, |
| "scores": { |
| "A": -8.546875, |
| "B": -12.5546875, |
| "C": -12.1875, |
| "D": -12.3828125, |
| "E": -11.40625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-242", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.8828125, |
| "scores": { |
| "A": -9.3046875, |
| "B": -10.8671875, |
| "C": -12.1875, |
| "D": -12.9375, |
| "E": -11.6875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.625, |
| "scores": { |
| "A": -6.5625, |
| "B": -11.6875, |
| "C": -14.1875, |
| "D": -14.421875, |
| "E": -14.375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-243", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -6.3671875, |
| "scores": { |
| "A": -15.640625, |
| "B": -9.2734375, |
| "C": -11.265625, |
| "D": -13.8515625, |
| "E": -12.7578125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.5078125, |
| "scores": { |
| "A": -8.6640625, |
| "B": -7.15625, |
| "C": -9.625, |
| "D": -11.8359375, |
| "E": -12.53125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-244", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -2.359375, |
| "scores": { |
| "A": -13.7421875, |
| "B": -12.625, |
| "C": -11.3828125, |
| "D": -12.9453125, |
| "E": -12.703125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 3.046875, |
| "scores": { |
| "A": -6.3125, |
| "B": -9.359375, |
| "C": -10.09375, |
| "D": -10.8984375, |
| "E": -12.4375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-245", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.359375, |
| "scores": { |
| "A": -11.796875, |
| "B": -11.140625, |
| "C": -11.5, |
| "D": -13.34375, |
| "E": -13.1171875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.48046875, |
| "scores": { |
| "A": -7.41015625, |
| "B": -13.3671875, |
| "C": -13.890625, |
| "D": -14.0625, |
| "E": -15.78125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-246", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -1.203125, |
| "scores": { |
| "A": -13.828125, |
| "B": -14.8046875, |
| "C": -15.125, |
| "D": -14.2890625, |
| "E": -13.6015625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.328125, |
| "scores": { |
| "A": -9.640625, |
| "B": -13.96875, |
| "C": -16.53125, |
| "D": -12.234375, |
| "E": -13.359375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-247", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -4.484375, |
| "scores": { |
| "A": -15.078125, |
| "B": -10.59375, |
| "C": -13.671875, |
| "D": -13.515625, |
| "E": -13.796875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.890625, |
| "scores": { |
| "A": -9.8046875, |
| "B": -11.3828125, |
| "C": -12.125, |
| "D": -10.6953125, |
| "E": -12.6640625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-248", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -3.4921875, |
| "scores": { |
| "A": -14.015625, |
| "B": -13.1328125, |
| "C": -13.9140625, |
| "D": -15.734375, |
| "E": -12.2421875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.1875, |
| "scores": { |
| "A": -8.6875, |
| "B": -9.5703125, |
| "C": -11.8046875, |
| "D": -11.875, |
| "E": -9.7734375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-249", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.828125, |
| "scores": { |
| "A": -10.328125, |
| "B": -8.859375, |
| "C": -8.03125, |
| "D": -11.640625, |
| "E": -11.109375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.59375, |
| "scores": { |
| "A": -5.9921875, |
| "B": -7.609375, |
| "C": -8.5859375, |
| "D": -8.4375, |
| "E": -8.6796875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-250", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -1.984375, |
| "scores": { |
| "A": -10.4921875, |
| "B": -11.6015625, |
| "C": -9.9921875, |
| "D": -14.578125, |
| "E": -11.9765625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.6015625, |
| "scores": { |
| "A": -8.0546875, |
| "B": -11.5625, |
| "C": -12.609375, |
| "D": -14.765625, |
| "E": -15.65625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-251", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.078125, |
| "scores": { |
| "A": -9.90625, |
| "B": -9.828125, |
| "C": -12.40625, |
| "D": -11.265625, |
| "E": -10.703125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.984375, |
| "scores": { |
| "A": -9.296875, |
| "B": -12.28125, |
| "C": -13.5, |
| "D": -12.09375, |
| "E": -11.140625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-252", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -0.1953125, |
| "scores": { |
| "A": -10.3671875, |
| "B": -9.515625, |
| "C": -9.65625, |
| "D": -10.5859375, |
| "E": -9.4609375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.25390625, |
| "scores": { |
| "A": -6.80859375, |
| "B": -8.46875, |
| "C": -9.0625, |
| "D": -9.125, |
| "E": -8.734375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-253", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.15625, |
| "scores": { |
| "A": -12.359375, |
| "B": -10.0625, |
| "C": -10.921875, |
| "D": -12.96875, |
| "E": -13.21875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.12890625, |
| "scores": { |
| "A": -4.81640625, |
| "B": -6.953125, |
| "C": -8.9140625, |
| "D": -9.578125, |
| "E": -11.9453125 |
| } |
| } |
| } |
| ], |
| "alpha_sweep_summary_on_flipset": { |
| "0.0": { |
| "n": 42, |
| "flip_rate": 0.0, |
| "ablated_acc": 1.0, |
| "pred_change_rate": 0.0, |
| "mean_margin": 0.9223400354385376, |
| "median_margin": 0.671875, |
| "mean_delta_margin_vs_baseline": 0.0, |
| "median_delta_margin_vs_baseline": 0.0 |
| }, |
| "0.05": { |
| "n": 42, |
| "flip_rate": 0.047619047619047616, |
| "ablated_acc": 0.9523809523809523, |
| "pred_change_rate": 0.047619047619047616, |
| "mean_margin": 0.918154776096344, |
| "median_margin": 0.69140625, |
| "mean_delta_margin_vs_baseline": -0.004185267724096775, |
| "median_delta_margin_vs_baseline": 0.0 |
| }, |
| "0.1": { |
| "n": 42, |
| "flip_rate": 0.09523809523809523, |
| "ablated_acc": 0.9047619047619048, |
| "pred_change_rate": 0.09523809523809523, |
| "mean_margin": 0.9194568395614624, |
| "median_margin": 0.66796875, |
| "mean_delta_margin_vs_baseline": -0.0028831844683736563, |
| "median_delta_margin_vs_baseline": 0.0 |
| }, |
| "0.2": { |
| "n": 42, |
| "flip_rate": 0.09523809523809523, |
| "ablated_acc": 0.9047619047619048, |
| "pred_change_rate": 0.09523809523809523, |
| "mean_margin": 0.919549822807312, |
| "median_margin": 0.64453125, |
| "mean_delta_margin_vs_baseline": -0.0027901786379516125, |
| "median_delta_margin_vs_baseline": -0.0078125 |
| }, |
| "0.5": { |
| "n": 42, |
| "flip_rate": 0.2857142857142857, |
| "ablated_acc": 0.7142857142857143, |
| "pred_change_rate": 0.2857142857142857, |
| "mean_margin": 0.603143572807312, |
| "median_margin": 0.37109375, |
| "mean_delta_margin_vs_baseline": -0.3191964328289032, |
| "median_delta_margin_vs_baseline": -0.34765625 |
| }, |
| "1.0": { |
| "n": 42, |
| "flip_rate": 1.0, |
| "ablated_acc": 0.0, |
| "pred_change_rate": 1.0, |
| "mean_margin": -2.7706472873687744, |
| "median_margin": -2.6171875, |
| "mean_delta_margin_vs_baseline": -3.6929874420166016, |
| "median_delta_margin_vs_baseline": -3.314453125 |
| } |
| }, |
| "alpha_sweep_rows_by_alpha": { |
| "0.0": [ |
| { |
| "ex_id": "aqua-test-2", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2578125, |
| "scores": { |
| "A": -11.234375, |
| "B": -10.2109375, |
| "C": -13.171875, |
| "D": -12.4453125, |
| "E": -10.46875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2578125, |
| "scores": { |
| "A": -11.234375, |
| "B": -10.2109375, |
| "C": -13.171875, |
| "D": -12.4453125, |
| "E": -10.46875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-5", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -11.9921875, |
| "B": -10.9765625, |
| "C": -12.0390625, |
| "D": -11.9609375, |
| "E": -11.9296875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -11.9921875, |
| "B": -10.9765625, |
| "C": -12.0390625, |
| "D": -11.9609375, |
| "E": -11.9296875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-9", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.078125, |
| "scores": { |
| "A": -11.265625, |
| "B": -8.890625, |
| "C": -9.96875, |
| "D": -12.359375, |
| "E": -13.9921875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.078125, |
| "scores": { |
| "A": -11.265625, |
| "B": -8.890625, |
| "C": -9.96875, |
| "D": -12.359375, |
| "E": -13.9921875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-15", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.671875, |
| "scores": { |
| "A": -11.078125, |
| "B": -10.40625, |
| "C": -13.625, |
| "D": -15.3125, |
| "E": -13.8125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.671875, |
| "scores": { |
| "A": -11.078125, |
| "B": -10.40625, |
| "C": -13.625, |
| "D": -15.3125, |
| "E": -13.8125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-16", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.80078125, |
| "scores": { |
| "A": -12.484375, |
| "B": -10.515625, |
| "C": -7.71484375, |
| "D": -12.859375, |
| "E": -12.8125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.80078125, |
| "scores": { |
| "A": -12.484375, |
| "B": -10.515625, |
| "C": -7.71484375, |
| "D": -12.859375, |
| "E": -12.8125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-21", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.765625, |
| "scores": { |
| "A": -10.4140625, |
| "B": -9.6484375, |
| "C": -12.5546875, |
| "D": -12.234375, |
| "E": -11.3828125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.765625, |
| "scores": { |
| "A": -10.4140625, |
| "B": -9.6484375, |
| "C": -12.5546875, |
| "D": -12.234375, |
| "E": -11.3828125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-25", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.0546875, |
| "scores": { |
| "A": -12.953125, |
| "B": -12.2578125, |
| "C": -12.203125, |
| "D": -12.4140625, |
| "E": -13.6328125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.0546875, |
| "scores": { |
| "A": -12.953125, |
| "B": -12.2578125, |
| "C": -12.203125, |
| "D": -12.4140625, |
| "E": -13.6328125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-33", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.1875, |
| "scores": { |
| "A": -17.28125, |
| "B": -18.1875, |
| "C": -16.09375, |
| "D": -19.15625, |
| "E": -19.46875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.1875, |
| "scores": { |
| "A": -17.28125, |
| "B": -18.1875, |
| "C": -16.09375, |
| "D": -19.15625, |
| "E": -19.46875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-39", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.71875, |
| "scores": { |
| "A": -10.2265625, |
| "B": -11.9453125, |
| "C": -12.1484375, |
| "D": -14.3125, |
| "E": -14.015625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.71875, |
| "scores": { |
| "A": -10.2265625, |
| "B": -11.9453125, |
| "C": -12.1484375, |
| "D": -14.3125, |
| "E": -14.015625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-47", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.203125, |
| "scores": { |
| "A": -11.9453125, |
| "B": -12.5, |
| "C": -12.1171875, |
| "D": -13.046875, |
| "E": -11.7421875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.203125, |
| "scores": { |
| "A": -11.9453125, |
| "B": -12.5, |
| "C": -12.1171875, |
| "D": -13.046875, |
| "E": -11.7421875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-52", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.0625, |
| "scores": { |
| "A": -12.890625, |
| "B": -9.8515625, |
| "C": -9.9140625, |
| "D": -11.515625, |
| "E": -10.6875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.0625, |
| "scores": { |
| "A": -12.890625, |
| "B": -9.8515625, |
| "C": -9.9140625, |
| "D": -11.515625, |
| "E": -10.6875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-57", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.15625, |
| "scores": { |
| "A": -13.875, |
| "B": -12.96875, |
| "C": -14.359375, |
| "D": -14.140625, |
| "E": -13.125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.15625, |
| "scores": { |
| "A": -13.875, |
| "B": -12.96875, |
| "C": -14.359375, |
| "D": -14.140625, |
| "E": -13.125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-68", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6875, |
| "scores": { |
| "A": -11.65625, |
| "B": -10.96875, |
| "C": -11.875, |
| "D": -12.078125, |
| "E": -12.640625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6875, |
| "scores": { |
| "A": -11.65625, |
| "B": -10.96875, |
| "C": -11.875, |
| "D": -12.078125, |
| "E": -12.640625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-78", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.078125, |
| "scores": { |
| "A": -12.7890625, |
| "B": -8.3203125, |
| "C": -11.3984375, |
| "D": -13.765625, |
| "E": -13.84375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.078125, |
| "scores": { |
| "A": -12.7890625, |
| "B": -8.3203125, |
| "C": -11.3984375, |
| "D": -13.765625, |
| "E": -13.84375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-87", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.625, |
| "scores": { |
| "A": -9.7890625, |
| "B": -9.1640625, |
| "C": -11.234375, |
| "D": -12.0, |
| "E": -11.46875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.625, |
| "scores": { |
| "A": -9.7890625, |
| "B": -9.1640625, |
| "C": -11.234375, |
| "D": -12.0, |
| "E": -11.46875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-100", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.1484375, |
| "scores": { |
| "A": -9.265625, |
| "B": -9.7265625, |
| "C": -9.1171875, |
| "D": -10.0546875, |
| "E": -10.6015625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.1484375, |
| "scores": { |
| "A": -9.265625, |
| "B": -9.7265625, |
| "C": -9.1171875, |
| "D": -10.0546875, |
| "E": -10.6015625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-103", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1484375, |
| "scores": { |
| "A": -9.734375, |
| "B": -8.5234375, |
| "C": -9.6875, |
| "D": -11.4375, |
| "E": -9.671875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1484375, |
| "scores": { |
| "A": -9.734375, |
| "B": -8.5234375, |
| "C": -9.6875, |
| "D": -11.4375, |
| "E": -9.671875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-105", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 3.03125, |
| "scores": { |
| "A": -11.5, |
| "B": -12.0234375, |
| "C": -8.46875, |
| "D": -13.9765625, |
| "E": -13.28125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 3.03125, |
| "scores": { |
| "A": -11.5, |
| "B": -12.0234375, |
| "C": -8.46875, |
| "D": -13.9765625, |
| "E": -13.28125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-111", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.203125, |
| "scores": { |
| "A": -9.796875, |
| "B": -9.2734375, |
| "C": -9.4765625, |
| "D": -10.7578125, |
| "E": -11.4296875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.203125, |
| "scores": { |
| "A": -9.796875, |
| "B": -9.2734375, |
| "C": -9.4765625, |
| "D": -10.7578125, |
| "E": -11.4296875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-116", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.7109375, |
| "scores": { |
| "A": -12.0390625, |
| "B": -9.7421875, |
| "C": -11.453125, |
| "D": -11.5390625, |
| "E": -11.8203125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.7109375, |
| "scores": { |
| "A": -12.0390625, |
| "B": -9.7421875, |
| "C": -11.453125, |
| "D": -11.5390625, |
| "E": -11.8203125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-120", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.3515625, |
| "scores": { |
| "A": -12.625, |
| "B": -10.171875, |
| "C": -10.5234375, |
| "D": -11.96875, |
| "E": -12.625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.3515625, |
| "scores": { |
| "A": -12.625, |
| "B": -10.171875, |
| "C": -10.5234375, |
| "D": -11.96875, |
| "E": -12.625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-122", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.671875, |
| "scores": { |
| "A": -11.09375, |
| "B": -10.421875, |
| "C": -13.25, |
| "D": -13.296875, |
| "E": -13.5 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.671875, |
| "scores": { |
| "A": -11.09375, |
| "B": -10.421875, |
| "C": -13.25, |
| "D": -13.296875, |
| "E": -13.5 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-123", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.9296875, |
| "scores": { |
| "A": -12.8125, |
| "B": -13.265625, |
| "C": -10.09375, |
| "D": -12.0234375, |
| "E": -12.84375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.9296875, |
| "scores": { |
| "A": -12.8125, |
| "B": -13.265625, |
| "C": -10.09375, |
| "D": -12.0234375, |
| "E": -12.84375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-125", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.25, |
| "scores": { |
| "A": -12.8984375, |
| "B": -12.015625, |
| "C": -10.3671875, |
| "D": -10.6171875, |
| "E": -11.515625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.25, |
| "scores": { |
| "A": -12.8984375, |
| "B": -12.015625, |
| "C": -10.3671875, |
| "D": -10.6171875, |
| "E": -11.515625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-130", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.5625, |
| "scores": { |
| "A": -12.53125, |
| "B": -11.625, |
| "C": -14.40625, |
| "D": -11.0625, |
| "E": -12.203125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.5625, |
| "scores": { |
| "A": -12.53125, |
| "B": -11.625, |
| "C": -14.40625, |
| "D": -11.0625, |
| "E": -12.203125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-140", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.09375, |
| "scores": { |
| "A": -12.171875, |
| "B": -10.953125, |
| "C": -12.484375, |
| "D": -12.046875, |
| "E": -12.828125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.09375, |
| "scores": { |
| "A": -12.171875, |
| "B": -10.953125, |
| "C": -12.484375, |
| "D": -12.046875, |
| "E": -12.828125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-141", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.390625, |
| "scores": { |
| "A": -15.65625, |
| "B": -14.0, |
| "C": -12.3359375, |
| "D": -12.7265625, |
| "E": -13.421875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.390625, |
| "scores": { |
| "A": -15.65625, |
| "B": -14.0, |
| "C": -12.3359375, |
| "D": -12.7265625, |
| "E": -13.421875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-148", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.09375, |
| "scores": { |
| "A": -11.84375, |
| "B": -8.6875, |
| "C": -9.390625, |
| "D": -8.59375, |
| "E": -10.328125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.09375, |
| "scores": { |
| "A": -11.84375, |
| "B": -8.6875, |
| "C": -9.390625, |
| "D": -8.59375, |
| "E": -10.328125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-152", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.65625, |
| "scores": { |
| "A": -12.15625, |
| "B": -11.09375, |
| "C": -11.75, |
| "D": -11.765625, |
| "E": -11.75 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.65625, |
| "scores": { |
| "A": -12.15625, |
| "B": -11.09375, |
| "C": -11.75, |
| "D": -11.765625, |
| "E": -11.75 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-167", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.28125, |
| "scores": { |
| "A": -13.3125, |
| "B": -10.640625, |
| "C": -12.921875, |
| "D": -16.09375, |
| "E": -14.75 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.28125, |
| "scores": { |
| "A": -13.3125, |
| "B": -10.640625, |
| "C": -12.921875, |
| "D": -16.09375, |
| "E": -14.75 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-178", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.4609375, |
| "scores": { |
| "A": -13.1953125, |
| "B": -11.1015625, |
| "C": -12.8203125, |
| "D": -12.625, |
| "E": -10.640625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.4609375, |
| "scores": { |
| "A": -13.1953125, |
| "B": -11.1015625, |
| "C": -12.8203125, |
| "D": -12.625, |
| "E": -10.640625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-181", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7734375, |
| "scores": { |
| "A": -9.6328125, |
| "B": -8.859375, |
| "C": -11.828125, |
| "D": -11.640625, |
| "E": -10.6875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7734375, |
| "scores": { |
| "A": -9.6328125, |
| "B": -8.859375, |
| "C": -11.828125, |
| "D": -11.640625, |
| "E": -10.6875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-183", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.40625, |
| "scores": { |
| "A": -12.6484375, |
| "B": -9.3671875, |
| "C": -10.7734375, |
| "D": -13.140625, |
| "E": -13.125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.40625, |
| "scores": { |
| "A": -12.6484375, |
| "B": -9.3671875, |
| "C": -10.7734375, |
| "D": -13.140625, |
| "E": -13.125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-189", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2109375, |
| "scores": { |
| "A": -14.5390625, |
| "B": -11.546875, |
| "C": -11.8046875, |
| "D": -11.7578125, |
| "E": -13.34375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2109375, |
| "scores": { |
| "A": -14.5390625, |
| "B": -11.546875, |
| "C": -11.8046875, |
| "D": -11.7578125, |
| "E": -13.34375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-190", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.34375, |
| "scores": { |
| "A": -13.6015625, |
| "B": -10.7734375, |
| "C": -10.4296875, |
| "D": -13.3671875, |
| "E": -13.9296875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.34375, |
| "scores": { |
| "A": -13.6015625, |
| "B": -10.7734375, |
| "C": -10.4296875, |
| "D": -13.3671875, |
| "E": -13.9296875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-191", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -11.890625, |
| "B": -10.9375, |
| "C": -13.640625, |
| "D": -14.109375, |
| "E": -13.765625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -11.890625, |
| "B": -10.9375, |
| "C": -13.640625, |
| "D": -14.109375, |
| "E": -13.765625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-206", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.1796875, |
| "scores": { |
| "A": -11.6171875, |
| "B": -10.96875, |
| "C": -11.1484375, |
| "D": -12.84375, |
| "E": -14.0 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.1796875, |
| "scores": { |
| "A": -11.6171875, |
| "B": -10.96875, |
| "C": -11.1484375, |
| "D": -12.84375, |
| "E": -14.0 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-212", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.5078125, |
| "scores": { |
| "A": -11.59375, |
| "B": -9.609375, |
| "C": -11.1171875, |
| "D": -11.7421875, |
| "E": -12.8359375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.5078125, |
| "scores": { |
| "A": -11.59375, |
| "B": -9.609375, |
| "C": -11.1171875, |
| "D": -11.7421875, |
| "E": -12.8359375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-223", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.09375, |
| "scores": { |
| "A": -10.265625, |
| "B": -7.1484375, |
| "C": -10.2421875, |
| "D": -10.921875, |
| "E": -11.3359375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.09375, |
| "scores": { |
| "A": -10.265625, |
| "B": -7.1484375, |
| "C": -10.2421875, |
| "D": -10.921875, |
| "E": -11.3359375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-228", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.078125, |
| "scores": { |
| "A": -13.3125, |
| "B": -10.71875, |
| "C": -10.796875, |
| "D": -14.0859375, |
| "E": -14.8984375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.078125, |
| "scores": { |
| "A": -13.3125, |
| "B": -10.71875, |
| "C": -10.796875, |
| "D": -14.0859375, |
| "E": -14.8984375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-249", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.828125, |
| "scores": { |
| "A": -10.328125, |
| "B": -8.859375, |
| "C": -8.03125, |
| "D": -11.640625, |
| "E": -11.109375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.828125, |
| "scores": { |
| "A": -10.328125, |
| "B": -8.859375, |
| "C": -8.03125, |
| "D": -11.640625, |
| "E": -11.109375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-251", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.078125, |
| "scores": { |
| "A": -9.90625, |
| "B": -9.828125, |
| "C": -12.40625, |
| "D": -11.265625, |
| "E": -10.703125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.078125, |
| "scores": { |
| "A": -9.90625, |
| "B": -9.828125, |
| "C": -12.40625, |
| "D": -11.265625, |
| "E": -10.703125 |
| } |
| }, |
| "flip": false |
| } |
| ], |
| "0.05": [ |
| { |
| "ex_id": "aqua-test-2", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2578125, |
| "scores": { |
| "A": -11.234375, |
| "B": -10.2109375, |
| "C": -13.171875, |
| "D": -12.4453125, |
| "E": -10.46875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.328125, |
| "scores": { |
| "A": -11.1015625, |
| "B": -10.0703125, |
| "C": -13.03125, |
| "D": -12.34375, |
| "E": -10.3984375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-5", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -11.9921875, |
| "B": -10.9765625, |
| "C": -12.0390625, |
| "D": -11.9609375, |
| "E": -11.9296875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -12.0, |
| "B": -10.96875, |
| "C": -12.109375, |
| "D": -11.953125, |
| "E": -11.921875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-9", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.078125, |
| "scores": { |
| "A": -11.265625, |
| "B": -8.890625, |
| "C": -9.96875, |
| "D": -12.359375, |
| "E": -13.9921875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.078125, |
| "scores": { |
| "A": -11.2421875, |
| "B": -8.8671875, |
| "C": -9.9453125, |
| "D": -12.3359375, |
| "E": -14.078125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-15", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.671875, |
| "scores": { |
| "A": -11.078125, |
| "B": -10.40625, |
| "C": -13.625, |
| "D": -15.3125, |
| "E": -13.8125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.734375, |
| "scores": { |
| "A": -11.03125, |
| "B": -10.296875, |
| "C": -13.5546875, |
| "D": -15.3359375, |
| "E": -13.8515625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-16", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.80078125, |
| "scores": { |
| "A": -12.484375, |
| "B": -10.515625, |
| "C": -7.71484375, |
| "D": -12.859375, |
| "E": -12.8125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.8671875, |
| "scores": { |
| "A": -12.484375, |
| "B": -10.515625, |
| "C": -7.6484375, |
| "D": -12.84375, |
| "E": -12.84375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-21", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.765625, |
| "scores": { |
| "A": -10.4140625, |
| "B": -9.6484375, |
| "C": -12.5546875, |
| "D": -12.234375, |
| "E": -11.3828125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.875, |
| "scores": { |
| "A": -10.3828125, |
| "B": -9.5078125, |
| "C": -12.5, |
| "D": -12.203125, |
| "E": -11.359375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-25", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.0546875, |
| "scores": { |
| "A": -12.953125, |
| "B": -12.2578125, |
| "C": -12.203125, |
| "D": -12.4140625, |
| "E": -13.6328125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.0078125, |
| "scores": { |
| "A": -12.8828125, |
| "B": -12.09375, |
| "C": -12.0859375, |
| "D": -12.3125, |
| "E": -13.4921875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-33", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.1875, |
| "scores": { |
| "A": -17.28125, |
| "B": -18.1875, |
| "C": -16.09375, |
| "D": -19.15625, |
| "E": -19.46875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.15625, |
| "scores": { |
| "A": -17.0625, |
| "B": -17.921875, |
| "C": -15.90625, |
| "D": -18.84375, |
| "E": -19.140625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-39", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.71875, |
| "scores": { |
| "A": -10.2265625, |
| "B": -11.9453125, |
| "C": -12.1484375, |
| "D": -14.3125, |
| "E": -14.015625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.6796875, |
| "scores": { |
| "A": -10.125, |
| "B": -11.8046875, |
| "C": -12.0859375, |
| "D": -14.1171875, |
| "E": -13.7890625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-47", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.203125, |
| "scores": { |
| "A": -11.9453125, |
| "B": -12.5, |
| "C": -12.1171875, |
| "D": -13.046875, |
| "E": -11.7421875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.21875, |
| "scores": { |
| "A": -12.0234375, |
| "B": -12.5625, |
| "C": -12.1640625, |
| "D": -13.09375, |
| "E": -11.8046875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-52", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.0625, |
| "scores": { |
| "A": -12.890625, |
| "B": -9.8515625, |
| "C": -9.9140625, |
| "D": -11.515625, |
| "E": -10.6875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.1015625, |
| "scores": { |
| "A": -12.828125, |
| "B": -9.765625, |
| "C": -9.8671875, |
| "D": -11.4921875, |
| "E": -10.640625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-57", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.15625, |
| "scores": { |
| "A": -13.875, |
| "B": -12.96875, |
| "C": -14.359375, |
| "D": -14.140625, |
| "E": -13.125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.1484375, |
| "scores": { |
| "A": -13.8515625, |
| "B": -12.9296875, |
| "C": -14.296875, |
| "D": -14.078125, |
| "E": -13.078125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-68", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6875, |
| "scores": { |
| "A": -11.65625, |
| "B": -10.96875, |
| "C": -11.875, |
| "D": -12.078125, |
| "E": -12.640625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6640625, |
| "scores": { |
| "A": -11.546875, |
| "B": -10.8828125, |
| "C": -11.796875, |
| "D": -12.046875, |
| "E": -12.515625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-78", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.078125, |
| "scores": { |
| "A": -12.7890625, |
| "B": -8.3203125, |
| "C": -11.3984375, |
| "D": -13.765625, |
| "E": -13.84375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.078125, |
| "scores": { |
| "A": -12.734375, |
| "B": -8.2890625, |
| "C": -11.3671875, |
| "D": -13.6875, |
| "E": -13.8515625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-87", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.625, |
| "scores": { |
| "A": -9.7890625, |
| "B": -9.1640625, |
| "C": -11.234375, |
| "D": -12.0, |
| "E": -11.46875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.65625, |
| "scores": { |
| "A": -9.71875, |
| "B": -9.0625, |
| "C": -11.1875, |
| "D": -11.9296875, |
| "E": -11.390625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-100", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.1484375, |
| "scores": { |
| "A": -9.265625, |
| "B": -9.7265625, |
| "C": -9.1171875, |
| "D": -10.0546875, |
| "E": -10.6015625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.140625, |
| "scores": { |
| "A": -9.2109375, |
| "B": -9.6875, |
| "C": -9.0703125, |
| "D": -10.0, |
| "E": -10.609375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-103", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1484375, |
| "scores": { |
| "A": -9.734375, |
| "B": -8.5234375, |
| "C": -9.6875, |
| "D": -11.4375, |
| "E": -9.671875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.125, |
| "scores": { |
| "A": -9.78125, |
| "B": -8.578125, |
| "C": -9.75, |
| "D": -11.46875, |
| "E": -9.703125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-105", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 3.03125, |
| "scores": { |
| "A": -11.5, |
| "B": -12.0234375, |
| "C": -8.46875, |
| "D": -13.9765625, |
| "E": -13.28125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 3.1484375, |
| "scores": { |
| "A": -11.5390625, |
| "B": -12.0078125, |
| "C": -8.390625, |
| "D": -13.984375, |
| "E": -13.3125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-111", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.203125, |
| "scores": { |
| "A": -9.796875, |
| "B": -9.2734375, |
| "C": -9.4765625, |
| "D": -10.7578125, |
| "E": -11.4296875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.203125, |
| "scores": { |
| "A": -9.796875, |
| "B": -9.265625, |
| "C": -9.46875, |
| "D": -10.796875, |
| "E": -11.421875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-116", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.7109375, |
| "scores": { |
| "A": -12.0390625, |
| "B": -9.7421875, |
| "C": -11.453125, |
| "D": -11.5390625, |
| "E": -11.8203125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.78125, |
| "scores": { |
| "A": -11.9375, |
| "B": -9.59375, |
| "C": -11.375, |
| "D": -11.421875, |
| "E": -11.75 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-120", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.3515625, |
| "scores": { |
| "A": -12.625, |
| "B": -10.171875, |
| "C": -10.5234375, |
| "D": -11.96875, |
| "E": -12.625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.28125, |
| "scores": { |
| "A": -12.5625, |
| "B": -10.0859375, |
| "C": -10.3671875, |
| "D": -11.875, |
| "E": -12.546875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-122", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.671875, |
| "scores": { |
| "A": -11.09375, |
| "B": -10.421875, |
| "C": -13.25, |
| "D": -13.296875, |
| "E": -13.5 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.71875, |
| "scores": { |
| "A": -11.140625, |
| "B": -10.421875, |
| "C": -13.28125, |
| "D": -13.3125, |
| "E": -13.484375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-123", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.9296875, |
| "scores": { |
| "A": -12.8125, |
| "B": -13.265625, |
| "C": -10.09375, |
| "D": -12.0234375, |
| "E": -12.84375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.9921875, |
| "scores": { |
| "A": -12.84375, |
| "B": -13.3671875, |
| "C": -10.0703125, |
| "D": -12.0625, |
| "E": -12.8359375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-125", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.25, |
| "scores": { |
| "A": -12.8984375, |
| "B": -12.015625, |
| "C": -10.3671875, |
| "D": -10.6171875, |
| "E": -11.515625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.09375, |
| "scores": { |
| "A": -12.96875, |
| "B": -12.0703125, |
| "C": -10.5859375, |
| "D": -10.6796875, |
| "E": -11.546875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-130", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.5625, |
| "scores": { |
| "A": -12.53125, |
| "B": -11.625, |
| "C": -14.40625, |
| "D": -11.0625, |
| "E": -12.203125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.5, |
| "scores": { |
| "A": -12.453125, |
| "B": -11.46875, |
| "C": -14.265625, |
| "D": -10.96875, |
| "E": -12.125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-140", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.09375, |
| "scores": { |
| "A": -12.171875, |
| "B": -10.953125, |
| "C": -12.484375, |
| "D": -12.046875, |
| "E": -12.828125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.125, |
| "scores": { |
| "A": -12.015625, |
| "B": -10.8125, |
| "C": -12.3125, |
| "D": -11.9375, |
| "E": -12.71875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-141", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.390625, |
| "scores": { |
| "A": -15.65625, |
| "B": -14.0, |
| "C": -12.3359375, |
| "D": -12.7265625, |
| "E": -13.421875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.359375, |
| "scores": { |
| "A": -15.5625, |
| "B": -13.9375, |
| "C": -12.3359375, |
| "D": -12.6953125, |
| "E": -13.3359375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-148", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.09375, |
| "scores": { |
| "A": -11.84375, |
| "B": -8.6875, |
| "C": -9.390625, |
| "D": -8.59375, |
| "E": -10.328125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.015625, |
| "scores": { |
| "A": -11.65625, |
| "B": -8.40625, |
| "C": -9.1875, |
| "D": -8.390625, |
| "E": -10.09375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-152", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.65625, |
| "scores": { |
| "A": -12.15625, |
| "B": -11.09375, |
| "C": -11.75, |
| "D": -11.765625, |
| "E": -11.75 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.625, |
| "scores": { |
| "A": -12.03125, |
| "B": -11.015625, |
| "C": -11.6875, |
| "D": -11.703125, |
| "E": -11.640625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-167", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.28125, |
| "scores": { |
| "A": -13.3125, |
| "B": -10.640625, |
| "C": -12.921875, |
| "D": -16.09375, |
| "E": -14.75 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.28125, |
| "scores": { |
| "A": -13.34375, |
| "B": -10.71875, |
| "C": -13.0, |
| "D": -16.109375, |
| "E": -14.8125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-178", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.4609375, |
| "scores": { |
| "A": -13.1953125, |
| "B": -11.1015625, |
| "C": -12.8203125, |
| "D": -12.625, |
| "E": -10.640625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.53125, |
| "scores": { |
| "A": -13.09375, |
| "B": -11.0, |
| "C": -12.75, |
| "D": -12.4375, |
| "E": -10.46875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-181", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7734375, |
| "scores": { |
| "A": -9.6328125, |
| "B": -8.859375, |
| "C": -11.828125, |
| "D": -11.640625, |
| "E": -10.6875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.71875, |
| "scores": { |
| "A": -9.546875, |
| "B": -8.828125, |
| "C": -11.765625, |
| "D": -11.5859375, |
| "E": -10.625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-183", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.40625, |
| "scores": { |
| "A": -12.6484375, |
| "B": -9.3671875, |
| "C": -10.7734375, |
| "D": -13.140625, |
| "E": -13.125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.359375, |
| "scores": { |
| "A": -12.625, |
| "B": -9.34375, |
| "C": -10.703125, |
| "D": -13.125, |
| "E": -13.109375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-189", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2109375, |
| "scores": { |
| "A": -14.5390625, |
| "B": -11.546875, |
| "C": -11.8046875, |
| "D": -11.7578125, |
| "E": -13.34375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2421875, |
| "scores": { |
| "A": -14.4453125, |
| "B": -11.5078125, |
| "C": -11.7890625, |
| "D": -11.75, |
| "E": -13.25 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-190", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.34375, |
| "scores": { |
| "A": -13.6015625, |
| "B": -10.7734375, |
| "C": -10.4296875, |
| "D": -13.3671875, |
| "E": -13.9296875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.40625, |
| "scores": { |
| "A": -13.609375, |
| "B": -10.765625, |
| "C": -10.359375, |
| "D": -13.375, |
| "E": -14.046875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-191", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -11.890625, |
| "B": -10.9375, |
| "C": -13.640625, |
| "D": -14.109375, |
| "E": -13.765625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.984375, |
| "scores": { |
| "A": -11.7734375, |
| "B": -10.7890625, |
| "C": -13.40625, |
| "D": -13.90625, |
| "E": -13.484375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-206", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.1796875, |
| "scores": { |
| "A": -11.6171875, |
| "B": -10.96875, |
| "C": -11.1484375, |
| "D": -12.84375, |
| "E": -14.0 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.0390625, |
| "scores": { |
| "A": -11.625, |
| "B": -11.0625, |
| "C": -11.0234375, |
| "D": -12.8203125, |
| "E": -13.9140625 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-212", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.5078125, |
| "scores": { |
| "A": -11.59375, |
| "B": -9.609375, |
| "C": -11.1171875, |
| "D": -11.7421875, |
| "E": -12.8359375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.4921875, |
| "scores": { |
| "A": -11.53125, |
| "B": -9.5625, |
| "C": -11.0546875, |
| "D": -11.703125, |
| "E": -12.8046875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-223", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.09375, |
| "scores": { |
| "A": -10.265625, |
| "B": -7.1484375, |
| "C": -10.2421875, |
| "D": -10.921875, |
| "E": -11.3359375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.109375, |
| "scores": { |
| "A": -10.3125, |
| "B": -7.109375, |
| "C": -10.21875, |
| "D": -10.9375, |
| "E": -11.4375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-228", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.078125, |
| "scores": { |
| "A": -13.3125, |
| "B": -10.71875, |
| "C": -10.796875, |
| "D": -14.0859375, |
| "E": -14.8984375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.09375, |
| "scores": { |
| "A": -13.2421875, |
| "B": -10.6640625, |
| "C": -10.7578125, |
| "D": -14.0, |
| "E": -14.8125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-249", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.828125, |
| "scores": { |
| "A": -10.328125, |
| "B": -8.859375, |
| "C": -8.03125, |
| "D": -11.640625, |
| "E": -11.109375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.78125, |
| "scores": { |
| "A": -10.375, |
| "B": -8.890625, |
| "C": -8.109375, |
| "D": -11.6875, |
| "E": -11.09375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-251", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.078125, |
| "scores": { |
| "A": -9.90625, |
| "B": -9.828125, |
| "C": -12.40625, |
| "D": -11.265625, |
| "E": -10.703125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.0546875, |
| "scores": { |
| "A": -9.7265625, |
| "B": -9.78125, |
| "C": -12.34375, |
| "D": -11.1875, |
| "E": -10.609375 |
| } |
| }, |
| "flip": true |
| } |
| ], |
| "0.1": [ |
| { |
| "ex_id": "aqua-test-2", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2578125, |
| "scores": { |
| "A": -11.234375, |
| "B": -10.2109375, |
| "C": -13.171875, |
| "D": -12.4453125, |
| "E": -10.46875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.390625, |
| "scores": { |
| "A": -10.921875, |
| "B": -9.890625, |
| "C": -12.859375, |
| "D": -12.21875, |
| "E": -10.28125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-5", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -11.9921875, |
| "B": -10.9765625, |
| "C": -12.0390625, |
| "D": -11.9609375, |
| "E": -11.9296875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -11.90625, |
| "B": -10.90625, |
| "C": -12.0625, |
| "D": -11.875, |
| "E": -11.859375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-9", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.078125, |
| "scores": { |
| "A": -11.265625, |
| "B": -8.890625, |
| "C": -9.96875, |
| "D": -12.359375, |
| "E": -13.9921875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.078125, |
| "scores": { |
| "A": -11.203125, |
| "B": -8.8359375, |
| "C": -9.9140625, |
| "D": -12.3125, |
| "E": -14.171875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-15", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.671875, |
| "scores": { |
| "A": -11.078125, |
| "B": -10.40625, |
| "C": -13.625, |
| "D": -15.3125, |
| "E": -13.8125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.78125, |
| "scores": { |
| "A": -10.890625, |
| "B": -10.109375, |
| "C": -13.390625, |
| "D": -15.265625, |
| "E": -13.84375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-16", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.80078125, |
| "scores": { |
| "A": -12.484375, |
| "B": -10.515625, |
| "C": -7.71484375, |
| "D": -12.859375, |
| "E": -12.8125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.9375, |
| "scores": { |
| "A": -12.4296875, |
| "B": -10.46875, |
| "C": -7.53125, |
| "D": -12.7734375, |
| "E": -12.8125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-21", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.765625, |
| "scores": { |
| "A": -10.4140625, |
| "B": -9.6484375, |
| "C": -12.5546875, |
| "D": -12.234375, |
| "E": -11.3828125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.9765625, |
| "scores": { |
| "A": -10.34375, |
| "B": -9.3671875, |
| "C": -12.4375, |
| "D": -12.171875, |
| "E": -11.3203125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-25", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.0546875, |
| "scores": { |
| "A": -12.953125, |
| "B": -12.2578125, |
| "C": -12.203125, |
| "D": -12.4140625, |
| "E": -13.6328125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.046875, |
| "scores": { |
| "A": -12.6875, |
| "B": -11.8125, |
| "C": -11.859375, |
| "D": -12.09375, |
| "E": -13.21875 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-33", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.1875, |
| "scores": { |
| "A": -17.28125, |
| "B": -18.1875, |
| "C": -16.09375, |
| "D": -19.15625, |
| "E": -19.46875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.28125, |
| "scores": { |
| "A": -16.671875, |
| "B": -17.34375, |
| "C": -15.390625, |
| "D": -18.234375, |
| "E": -18.515625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-39", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.71875, |
| "scores": { |
| "A": -10.2265625, |
| "B": -11.9453125, |
| "C": -12.1484375, |
| "D": -14.3125, |
| "E": -14.015625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.65625, |
| "scores": { |
| "A": -10.046875, |
| "B": -11.703125, |
| "C": -12.03125, |
| "D": -13.9375, |
| "E": -13.5625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-47", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.203125, |
| "scores": { |
| "A": -11.9453125, |
| "B": -12.5, |
| "C": -12.1171875, |
| "D": -13.046875, |
| "E": -11.7421875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.21875, |
| "scores": { |
| "A": -12.0546875, |
| "B": -12.609375, |
| "C": -12.15625, |
| "D": -13.0859375, |
| "E": -11.8359375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-52", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.0625, |
| "scores": { |
| "A": -12.890625, |
| "B": -9.8515625, |
| "C": -9.9140625, |
| "D": -11.515625, |
| "E": -10.6875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.140625, |
| "scores": { |
| "A": -12.75, |
| "B": -9.65625, |
| "C": -9.796875, |
| "D": -11.46875, |
| "E": -10.59375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-57", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.15625, |
| "scores": { |
| "A": -13.875, |
| "B": -12.96875, |
| "C": -14.359375, |
| "D": -14.140625, |
| "E": -13.125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.140625, |
| "scores": { |
| "A": -13.75, |
| "B": -12.84375, |
| "C": -14.171875, |
| "D": -13.96875, |
| "E": -12.984375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-68", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6875, |
| "scores": { |
| "A": -11.65625, |
| "B": -10.96875, |
| "C": -11.875, |
| "D": -12.078125, |
| "E": -12.640625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.65625, |
| "scores": { |
| "A": -11.4609375, |
| "B": -10.8046875, |
| "C": -11.7265625, |
| "D": -12.015625, |
| "E": -12.390625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-78", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.078125, |
| "scores": { |
| "A": -12.7890625, |
| "B": -8.3203125, |
| "C": -11.3984375, |
| "D": -13.765625, |
| "E": -13.84375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.09375, |
| "scores": { |
| "A": -12.65625, |
| "B": -8.21875, |
| "C": -11.3125, |
| "D": -13.59375, |
| "E": -13.828125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-87", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.625, |
| "scores": { |
| "A": -9.7890625, |
| "B": -9.1640625, |
| "C": -11.234375, |
| "D": -12.0, |
| "E": -11.46875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6640625, |
| "scores": { |
| "A": -9.6015625, |
| "B": -8.9375, |
| "C": -11.125, |
| "D": -11.8359375, |
| "E": -11.296875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-100", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.1484375, |
| "scores": { |
| "A": -9.265625, |
| "B": -9.7265625, |
| "C": -9.1171875, |
| "D": -10.0546875, |
| "E": -10.6015625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.1328125, |
| "scores": { |
| "A": -9.125, |
| "B": -9.609375, |
| "C": -8.9921875, |
| "D": -9.9375, |
| "E": -10.609375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-103", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1484375, |
| "scores": { |
| "A": -9.734375, |
| "B": -8.5234375, |
| "C": -9.6875, |
| "D": -11.4375, |
| "E": -9.671875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1171875, |
| "scores": { |
| "A": -9.796875, |
| "B": -8.6171875, |
| "C": -9.796875, |
| "D": -11.5, |
| "E": -9.734375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-105", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 3.03125, |
| "scores": { |
| "A": -11.5, |
| "B": -12.0234375, |
| "C": -8.46875, |
| "D": -13.9765625, |
| "E": -13.28125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 3.2421875, |
| "scores": { |
| "A": -11.4921875, |
| "B": -11.9296875, |
| "C": -8.25, |
| "D": -13.9375, |
| "E": -13.296875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-111", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.203125, |
| "scores": { |
| "A": -9.796875, |
| "B": -9.2734375, |
| "C": -9.4765625, |
| "D": -10.7578125, |
| "E": -11.4296875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.1953125, |
| "scores": { |
| "A": -9.75, |
| "B": -9.21875, |
| "C": -9.4140625, |
| "D": -10.765625, |
| "E": -11.3671875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-116", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.7109375, |
| "scores": { |
| "A": -12.0390625, |
| "B": -9.7421875, |
| "C": -11.453125, |
| "D": -11.5390625, |
| "E": -11.8203125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.875, |
| "scores": { |
| "A": -11.765625, |
| "B": -9.390625, |
| "C": -11.265625, |
| "D": -11.28125, |
| "E": -11.6328125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-120", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.3515625, |
| "scores": { |
| "A": -12.625, |
| "B": -10.171875, |
| "C": -10.5234375, |
| "D": -11.96875, |
| "E": -12.625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2421875, |
| "scores": { |
| "A": -12.46875, |
| "B": -9.9609375, |
| "C": -10.203125, |
| "D": -11.7578125, |
| "E": -12.4765625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-122", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.671875, |
| "scores": { |
| "A": -11.09375, |
| "B": -10.421875, |
| "C": -13.25, |
| "D": -13.296875, |
| "E": -13.5 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7421875, |
| "scores": { |
| "A": -11.1328125, |
| "B": -10.390625, |
| "C": -13.265625, |
| "D": -13.28125, |
| "E": -13.453125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-123", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.9296875, |
| "scores": { |
| "A": -12.8125, |
| "B": -13.265625, |
| "C": -10.09375, |
| "D": -12.0234375, |
| "E": -12.84375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.046875, |
| "scores": { |
| "A": -12.78125, |
| "B": -13.40625, |
| "C": -9.984375, |
| "D": -12.03125, |
| "E": -12.78125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-125", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.25, |
| "scores": { |
| "A": -12.8984375, |
| "B": -12.015625, |
| "C": -10.3671875, |
| "D": -10.6171875, |
| "E": -11.515625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.0, |
| "scores": { |
| "A": -12.96875, |
| "B": -12.03125, |
| "C": -10.6875, |
| "D": -10.6875, |
| "E": -11.53125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-130", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.5625, |
| "scores": { |
| "A": -12.53125, |
| "B": -11.625, |
| "C": -14.40625, |
| "D": -11.0625, |
| "E": -12.203125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.421875, |
| "scores": { |
| "A": -12.3203125, |
| "B": -11.265625, |
| "C": -14.0625, |
| "D": -10.84375, |
| "E": -11.96875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-140", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.09375, |
| "scores": { |
| "A": -12.171875, |
| "B": -10.953125, |
| "C": -12.484375, |
| "D": -12.046875, |
| "E": -12.828125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.15625, |
| "scores": { |
| "A": -11.8125, |
| "B": -10.65625, |
| "C": -12.09375, |
| "D": -11.8125, |
| "E": -12.59375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-141", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.390625, |
| "scores": { |
| "A": -15.65625, |
| "B": -14.0, |
| "C": -12.3359375, |
| "D": -12.7265625, |
| "E": -13.421875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.34375, |
| "scores": { |
| "A": -15.421875, |
| "B": -13.8359375, |
| "C": -12.265625, |
| "D": -12.609375, |
| "E": -13.203125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-148", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.09375, |
| "scores": { |
| "A": -11.84375, |
| "B": -8.6875, |
| "C": -9.390625, |
| "D": -8.59375, |
| "E": -10.328125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.015625, |
| "scores": { |
| "A": -11.453125, |
| "B": -8.171875, |
| "C": -8.96875, |
| "D": -8.1875, |
| "E": -9.875 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-152", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.65625, |
| "scores": { |
| "A": -12.15625, |
| "B": -11.09375, |
| "C": -11.75, |
| "D": -11.765625, |
| "E": -11.75 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.59375, |
| "scores": { |
| "A": -11.8828125, |
| "B": -10.9296875, |
| "C": -11.59375, |
| "D": -11.609375, |
| "E": -11.5234375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-167", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.28125, |
| "scores": { |
| "A": -13.3125, |
| "B": -10.640625, |
| "C": -12.921875, |
| "D": -16.09375, |
| "E": -14.75 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.2421875, |
| "scores": { |
| "A": -13.265625, |
| "B": -10.7109375, |
| "C": -12.953125, |
| "D": -16.0, |
| "E": -14.765625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-178", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.4609375, |
| "scores": { |
| "A": -13.1953125, |
| "B": -11.1015625, |
| "C": -12.8203125, |
| "D": -12.625, |
| "E": -10.640625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.5546875, |
| "scores": { |
| "A": -12.8984375, |
| "B": -10.78125, |
| "C": -12.5234375, |
| "D": -12.1640625, |
| "E": -10.2265625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-181", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7734375, |
| "scores": { |
| "A": -9.6328125, |
| "B": -8.859375, |
| "C": -11.828125, |
| "D": -11.640625, |
| "E": -10.6875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.671875, |
| "scores": { |
| "A": -9.4296875, |
| "B": -8.7578125, |
| "C": -11.671875, |
| "D": -11.515625, |
| "E": -10.546875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-183", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.40625, |
| "scores": { |
| "A": -12.6484375, |
| "B": -9.3671875, |
| "C": -10.7734375, |
| "D": -13.140625, |
| "E": -13.125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.3125, |
| "scores": { |
| "A": -12.546875, |
| "B": -9.28125, |
| "C": -10.59375, |
| "D": -13.078125, |
| "E": -13.046875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-189", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2109375, |
| "scores": { |
| "A": -14.5390625, |
| "B": -11.546875, |
| "C": -11.8046875, |
| "D": -11.7578125, |
| "E": -13.34375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.28125, |
| "scores": { |
| "A": -14.328125, |
| "B": -11.46875, |
| "C": -11.75, |
| "D": -11.75, |
| "E": -13.125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-190", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.34375, |
| "scores": { |
| "A": -13.6015625, |
| "B": -10.7734375, |
| "C": -10.4296875, |
| "D": -13.3671875, |
| "E": -13.9296875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.421875, |
| "scores": { |
| "A": -13.5078125, |
| "B": -10.6640625, |
| "C": -10.2421875, |
| "D": -13.3046875, |
| "E": -14.078125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-191", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -11.890625, |
| "B": -10.9375, |
| "C": -13.640625, |
| "D": -14.109375, |
| "E": -13.765625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.984375, |
| "scores": { |
| "A": -11.59375, |
| "B": -10.609375, |
| "C": -13.15625, |
| "D": -13.6875, |
| "E": -13.25 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-206", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.1796875, |
| "scores": { |
| "A": -11.6171875, |
| "B": -10.96875, |
| "C": -11.1484375, |
| "D": -12.84375, |
| "E": -14.0 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.2109375, |
| "scores": { |
| "A": -11.5703125, |
| "B": -11.109375, |
| "C": -10.8984375, |
| "D": -12.75, |
| "E": -13.8203125 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-212", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.5078125, |
| "scores": { |
| "A": -11.59375, |
| "B": -9.609375, |
| "C": -11.1171875, |
| "D": -11.7421875, |
| "E": -12.8359375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.5, |
| "scores": { |
| "A": -11.421875, |
| "B": -9.4609375, |
| "C": -10.9609375, |
| "D": -11.6328125, |
| "E": -12.75 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-223", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.09375, |
| "scores": { |
| "A": -10.265625, |
| "B": -7.1484375, |
| "C": -10.2421875, |
| "D": -10.921875, |
| "E": -11.3359375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.1328125, |
| "scores": { |
| "A": -10.3359375, |
| "B": -7.0625, |
| "C": -10.1953125, |
| "D": -10.9296875, |
| "E": -11.5 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-228", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.078125, |
| "scores": { |
| "A": -13.3125, |
| "B": -10.71875, |
| "C": -10.796875, |
| "D": -14.0859375, |
| "E": -14.8984375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.1171875, |
| "scores": { |
| "A": -13.1171875, |
| "B": -10.5625, |
| "C": -10.6796875, |
| "D": -13.875, |
| "E": -14.640625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-249", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.828125, |
| "scores": { |
| "A": -10.328125, |
| "B": -8.859375, |
| "C": -8.03125, |
| "D": -11.640625, |
| "E": -11.109375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.765625, |
| "scores": { |
| "A": -10.375, |
| "B": -8.890625, |
| "C": -8.125, |
| "D": -11.703125, |
| "E": -11.0625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-251", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.078125, |
| "scores": { |
| "A": -9.90625, |
| "B": -9.828125, |
| "C": -12.40625, |
| "D": -11.265625, |
| "E": -10.703125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.171875, |
| "scores": { |
| "A": -9.4921875, |
| "B": -9.6640625, |
| "C": -12.203125, |
| "D": -11.0390625, |
| "E": -10.453125 |
| } |
| }, |
| "flip": true |
| } |
| ], |
| "0.2": [ |
| { |
| "ex_id": "aqua-test-2", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2578125, |
| "scores": { |
| "A": -11.234375, |
| "B": -10.2109375, |
| "C": -13.171875, |
| "D": -12.4453125, |
| "E": -10.46875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.578125, |
| "scores": { |
| "A": -10.390625, |
| "B": -9.359375, |
| "C": -12.34375, |
| "D": -11.828125, |
| "E": -9.9375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-5", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -11.9921875, |
| "B": -10.9765625, |
| "C": -12.0390625, |
| "D": -11.9609375, |
| "E": -11.9296875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -11.6171875, |
| "B": -10.640625, |
| "C": -11.796875, |
| "D": -11.6171875, |
| "E": -11.59375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-9", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.078125, |
| "scores": { |
| "A": -11.265625, |
| "B": -8.890625, |
| "C": -9.96875, |
| "D": -12.359375, |
| "E": -13.9921875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.0859375, |
| "scores": { |
| "A": -10.9921875, |
| "B": -8.6796875, |
| "C": -9.765625, |
| "D": -12.1640625, |
| "E": -14.25 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-15", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.671875, |
| "scores": { |
| "A": -11.078125, |
| "B": -10.40625, |
| "C": -13.625, |
| "D": -15.3125, |
| "E": -13.8125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.8125, |
| "scores": { |
| "A": -10.46875, |
| "B": -9.65625, |
| "C": -12.953125, |
| "D": -15.015625, |
| "E": -13.765625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-16", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.80078125, |
| "scores": { |
| "A": -12.484375, |
| "B": -10.515625, |
| "C": -7.71484375, |
| "D": -12.859375, |
| "E": -12.8125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 3.0546875, |
| "scores": { |
| "A": -11.90625, |
| "B": -10.0625, |
| "C": -7.0078125, |
| "D": -12.2265625, |
| "E": -12.40625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-21", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.765625, |
| "scores": { |
| "A": -10.4140625, |
| "B": -9.6484375, |
| "C": -12.5546875, |
| "D": -12.234375, |
| "E": -11.3828125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.171875, |
| "scores": { |
| "A": -10.15625, |
| "B": -8.984375, |
| "C": -12.1875, |
| "D": -12.0546875, |
| "E": -11.234375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-25", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.0546875, |
| "scores": { |
| "A": -12.953125, |
| "B": -12.2578125, |
| "C": -12.203125, |
| "D": -12.4140625, |
| "E": -13.6328125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.109375, |
| "scores": { |
| "A": -12.09375, |
| "B": -11.15625, |
| "C": -11.265625, |
| "D": -11.546875, |
| "E": -12.546875 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-33", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.1875, |
| "scores": { |
| "A": -17.28125, |
| "B": -18.1875, |
| "C": -16.09375, |
| "D": -19.15625, |
| "E": -19.46875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.78125, |
| "scores": { |
| "A": -15.453125, |
| "B": -15.421875, |
| "C": -13.640625, |
| "D": -16.265625, |
| "E": -16.453125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-39", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.71875, |
| "scores": { |
| "A": -10.2265625, |
| "B": -11.9453125, |
| "C": -12.1484375, |
| "D": -14.3125, |
| "E": -14.015625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.59375, |
| "scores": { |
| "A": -9.765625, |
| "B": -11.359375, |
| "C": -11.796875, |
| "D": -13.546875, |
| "E": -13.078125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-47", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.203125, |
| "scores": { |
| "A": -11.9453125, |
| "B": -12.5, |
| "C": -12.1171875, |
| "D": -13.046875, |
| "E": -11.7421875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.1953125, |
| "scores": { |
| "A": -11.9921875, |
| "B": -12.65625, |
| "C": -12.046875, |
| "D": -12.9765625, |
| "E": -11.796875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-52", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.0625, |
| "scores": { |
| "A": -12.890625, |
| "B": -9.8515625, |
| "C": -9.9140625, |
| "D": -11.515625, |
| "E": -10.6875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.15625, |
| "scores": { |
| "A": -12.46875, |
| "B": -9.359375, |
| "C": -9.515625, |
| "D": -11.3125, |
| "E": -10.484375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-57", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.15625, |
| "scores": { |
| "A": -13.875, |
| "B": -12.96875, |
| "C": -14.359375, |
| "D": -14.140625, |
| "E": -13.125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.171875, |
| "scores": { |
| "A": -13.4375, |
| "B": -12.546875, |
| "C": -13.8125, |
| "D": -13.65625, |
| "E": -12.71875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-68", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6875, |
| "scores": { |
| "A": -11.65625, |
| "B": -10.96875, |
| "C": -11.875, |
| "D": -12.078125, |
| "E": -12.640625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6640625, |
| "scores": { |
| "A": -11.21875, |
| "B": -10.5546875, |
| "C": -11.484375, |
| "D": -11.875, |
| "E": -12.0625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-78", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.078125, |
| "scores": { |
| "A": -12.7890625, |
| "B": -8.3203125, |
| "C": -11.3984375, |
| "D": -13.765625, |
| "E": -13.84375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.125, |
| "scores": { |
| "A": -12.3984375, |
| "B": -8.03125, |
| "C": -11.15625, |
| "D": -13.390625, |
| "E": -13.75 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-87", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.625, |
| "scores": { |
| "A": -9.7890625, |
| "B": -9.1640625, |
| "C": -11.234375, |
| "D": -12.0, |
| "E": -11.46875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.625, |
| "scores": { |
| "A": -9.296875, |
| "B": -8.671875, |
| "C": -10.9375, |
| "D": -11.5625, |
| "E": -11.015625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-100", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.1484375, |
| "scores": { |
| "A": -9.265625, |
| "B": -9.7265625, |
| "C": -9.1171875, |
| "D": -10.0546875, |
| "E": -10.6015625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.1015625, |
| "scores": { |
| "A": -8.734375, |
| "B": -9.21875, |
| "C": -8.6328125, |
| "D": -9.640625, |
| "E": -10.421875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-103", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1484375, |
| "scores": { |
| "A": -9.734375, |
| "B": -8.5234375, |
| "C": -9.6875, |
| "D": -11.4375, |
| "E": -9.671875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1328125, |
| "scores": { |
| "A": -9.65625, |
| "B": -8.5234375, |
| "C": -9.734375, |
| "D": -11.484375, |
| "E": -9.75 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-105", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 3.03125, |
| "scores": { |
| "A": -11.5, |
| "B": -12.0234375, |
| "C": -8.46875, |
| "D": -13.9765625, |
| "E": -13.28125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 3.38671875, |
| "scores": { |
| "A": -11.265625, |
| "B": -11.75, |
| "C": -7.87890625, |
| "D": -13.8125, |
| "E": -13.2265625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-111", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.203125, |
| "scores": { |
| "A": -9.796875, |
| "B": -9.2734375, |
| "C": -9.4765625, |
| "D": -10.7578125, |
| "E": -11.4296875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.1796875, |
| "scores": { |
| "A": -9.5859375, |
| "B": -9.0859375, |
| "C": -9.265625, |
| "D": -10.625, |
| "E": -11.1953125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-116", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.7109375, |
| "scores": { |
| "A": -12.0390625, |
| "B": -9.7421875, |
| "C": -11.453125, |
| "D": -11.5390625, |
| "E": -11.8203125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.0, |
| "scores": { |
| "A": -11.296875, |
| "B": -8.984375, |
| "C": -11.015625, |
| "D": -10.984375, |
| "E": -11.359375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-120", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.3515625, |
| "scores": { |
| "A": -12.625, |
| "B": -10.171875, |
| "C": -10.5234375, |
| "D": -11.96875, |
| "E": -12.625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.21875, |
| "scores": { |
| "A": -12.203125, |
| "B": -9.6171875, |
| "C": -9.8359375, |
| "D": -11.46875, |
| "E": -12.3125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-122", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.671875, |
| "scores": { |
| "A": -11.09375, |
| "B": -10.421875, |
| "C": -13.25, |
| "D": -13.296875, |
| "E": -13.5 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.734375, |
| "scores": { |
| "A": -10.984375, |
| "B": -10.25, |
| "C": -13.140625, |
| "D": -13.140625, |
| "E": -13.328125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-123", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.9296875, |
| "scores": { |
| "A": -12.8125, |
| "B": -13.265625, |
| "C": -10.09375, |
| "D": -12.0234375, |
| "E": -12.84375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.1328125, |
| "scores": { |
| "A": -12.390625, |
| "B": -13.234375, |
| "C": -9.671875, |
| "D": -11.8046875, |
| "E": -12.515625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-125", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.25, |
| "scores": { |
| "A": -12.8984375, |
| "B": -12.015625, |
| "C": -10.3671875, |
| "D": -10.6171875, |
| "E": -11.515625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.015625, |
| "scores": { |
| "A": -12.59375, |
| "B": -11.640625, |
| "C": -10.5, |
| "D": -10.484375, |
| "E": -11.25 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-130", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.5625, |
| "scores": { |
| "A": -12.53125, |
| "B": -11.625, |
| "C": -14.40625, |
| "D": -11.0625, |
| "E": -12.203125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.296875, |
| "scores": { |
| "A": -11.875, |
| "B": -10.78125, |
| "C": -13.421875, |
| "D": -10.484375, |
| "E": -11.34375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-140", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.09375, |
| "scores": { |
| "A": -12.171875, |
| "B": -10.953125, |
| "C": -12.484375, |
| "D": -12.046875, |
| "E": -12.828125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.0078125, |
| "scores": { |
| "A": -11.3984375, |
| "B": -10.390625, |
| "C": -11.703125, |
| "D": -11.6015625, |
| "E": -12.421875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-141", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.390625, |
| "scores": { |
| "A": -15.65625, |
| "B": -14.0, |
| "C": -12.3359375, |
| "D": -12.7265625, |
| "E": -13.421875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.3671875, |
| "scores": { |
| "A": -15.03125, |
| "B": -13.484375, |
| "C": -11.953125, |
| "D": -12.3203125, |
| "E": -12.7890625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-148", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.09375, |
| "scores": { |
| "A": -11.84375, |
| "B": -8.6875, |
| "C": -9.390625, |
| "D": -8.59375, |
| "E": -10.328125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.015625, |
| "scores": { |
| "A": -11.125, |
| "B": -7.8671875, |
| "C": -8.59375, |
| "D": -7.8515625, |
| "E": -9.5 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-152", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.65625, |
| "scores": { |
| "A": -12.15625, |
| "B": -11.09375, |
| "C": -11.75, |
| "D": -11.765625, |
| "E": -11.75 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.546875, |
| "scores": { |
| "A": -11.421875, |
| "B": -10.640625, |
| "C": -11.1875, |
| "D": -11.34375, |
| "E": -11.21875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-167", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.28125, |
| "scores": { |
| "A": -13.3125, |
| "B": -10.640625, |
| "C": -12.921875, |
| "D": -16.09375, |
| "E": -14.75 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.1171875, |
| "scores": { |
| "A": -12.859375, |
| "B": -10.4921875, |
| "C": -12.609375, |
| "D": -15.53125, |
| "E": -14.4765625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-178", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.4609375, |
| "scores": { |
| "A": -13.1953125, |
| "B": -11.1015625, |
| "C": -12.8203125, |
| "D": -12.625, |
| "E": -10.640625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.515625, |
| "scores": { |
| "A": -12.34375, |
| "B": -10.1875, |
| "C": -11.78125, |
| "D": -11.53125, |
| "E": -9.671875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-181", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7734375, |
| "scores": { |
| "A": -9.6328125, |
| "B": -8.859375, |
| "C": -11.828125, |
| "D": -11.640625, |
| "E": -10.6875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.625, |
| "scores": { |
| "A": -9.0, |
| "B": -8.375, |
| "C": -11.2734375, |
| "D": -11.1875, |
| "E": -10.2265625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-183", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.40625, |
| "scores": { |
| "A": -12.6484375, |
| "B": -9.3671875, |
| "C": -10.7734375, |
| "D": -13.140625, |
| "E": -13.125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.140625, |
| "scores": { |
| "A": -12.3125, |
| "B": -9.15625, |
| "C": -10.296875, |
| "D": -12.921875, |
| "E": -12.953125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-189", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2109375, |
| "scores": { |
| "A": -14.5390625, |
| "B": -11.546875, |
| "C": -11.8046875, |
| "D": -11.7578125, |
| "E": -13.34375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.3125, |
| "scores": { |
| "A": -14.015625, |
| "B": -11.3125, |
| "C": -11.625, |
| "D": -11.765625, |
| "E": -12.8671875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-190", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.34375, |
| "scores": { |
| "A": -13.6015625, |
| "B": -10.7734375, |
| "C": -10.4296875, |
| "D": -13.3671875, |
| "E": -13.9296875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.359375, |
| "scores": { |
| "A": -13.125, |
| "B": -10.3125, |
| "C": -9.953125, |
| "D": -13.0625, |
| "E": -13.9453125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-191", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -11.890625, |
| "B": -10.9375, |
| "C": -13.640625, |
| "D": -14.109375, |
| "E": -13.765625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.859375, |
| "scores": { |
| "A": -11.03125, |
| "B": -10.171875, |
| "C": -12.625, |
| "D": -13.140625, |
| "E": -12.796875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-206", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.1796875, |
| "scores": { |
| "A": -11.6171875, |
| "B": -10.96875, |
| "C": -11.1484375, |
| "D": -12.84375, |
| "E": -14.0 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.390625, |
| "scores": { |
| "A": -11.2578125, |
| "B": -10.9609375, |
| "C": -10.5703125, |
| "D": -12.4375, |
| "E": -13.6015625 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-212", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.5078125, |
| "scores": { |
| "A": -11.59375, |
| "B": -9.609375, |
| "C": -11.1171875, |
| "D": -11.7421875, |
| "E": -12.8359375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.515625, |
| "scores": { |
| "A": -11.078125, |
| "B": -9.1875, |
| "C": -10.703125, |
| "D": -11.40625, |
| "E": -12.609375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-223", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.09375, |
| "scores": { |
| "A": -10.265625, |
| "B": -7.1484375, |
| "C": -10.2421875, |
| "D": -10.921875, |
| "E": -11.3359375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.0859375, |
| "scores": { |
| "A": -10.2109375, |
| "B": -6.890625, |
| "C": -9.9765625, |
| "D": -10.75, |
| "E": -11.4296875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-228", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.078125, |
| "scores": { |
| "A": -13.3125, |
| "B": -10.71875, |
| "C": -10.796875, |
| "D": -14.0859375, |
| "E": -14.8984375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.1640625, |
| "scores": { |
| "A": -12.7578125, |
| "B": -10.2578125, |
| "C": -10.421875, |
| "D": -13.53125, |
| "E": -14.1953125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-249", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.828125, |
| "scores": { |
| "A": -10.328125, |
| "B": -8.859375, |
| "C": -8.03125, |
| "D": -11.640625, |
| "E": -11.109375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.71875, |
| "scores": { |
| "A": -10.2578125, |
| "B": -8.7734375, |
| "C": -8.0546875, |
| "D": -11.65625, |
| "E": -10.9140625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-251", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.078125, |
| "scores": { |
| "A": -9.90625, |
| "B": -9.828125, |
| "C": -12.40625, |
| "D": -11.265625, |
| "E": -10.703125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.3671875, |
| "scores": { |
| "A": -8.8984375, |
| "B": -9.265625, |
| "C": -11.75, |
| "D": -10.6015625, |
| "E": -9.9921875 |
| } |
| }, |
| "flip": true |
| } |
| ], |
| "0.5": [ |
| { |
| "ex_id": "aqua-test-2", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2578125, |
| "scores": { |
| "A": -11.234375, |
| "B": -10.2109375, |
| "C": -13.171875, |
| "D": -12.4453125, |
| "E": -10.46875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6796875, |
| "scores": { |
| "A": -8.2890625, |
| "B": -7.609375, |
| "C": -10.546875, |
| "D": -11.421875, |
| "E": -9.6484375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-5", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -11.9921875, |
| "B": -10.9765625, |
| "C": -12.0390625, |
| "D": -11.9609375, |
| "E": -11.9296875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.5, |
| "scores": { |
| "A": -9.90625, |
| "B": -9.3125, |
| "C": -9.8125, |
| "D": -10.203125, |
| "E": -10.078125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-9", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.078125, |
| "scores": { |
| "A": -11.265625, |
| "B": -8.890625, |
| "C": -9.96875, |
| "D": -12.359375, |
| "E": -13.9921875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.265625, |
| "scores": { |
| "A": -9.953125, |
| "B": -8.109375, |
| "C": -9.375, |
| "D": -11.640625, |
| "E": -13.875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-15", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.671875, |
| "scores": { |
| "A": -11.078125, |
| "B": -10.40625, |
| "C": -13.625, |
| "D": -15.3125, |
| "E": -13.8125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.125, |
| "scores": { |
| "A": -8.296875, |
| "B": -8.421875, |
| "C": -10.796875, |
| "D": -12.859375, |
| "E": -12.9375 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-16", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.80078125, |
| "scores": { |
| "A": -12.484375, |
| "B": -10.515625, |
| "C": -7.71484375, |
| "D": -12.859375, |
| "E": -12.8125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 3.44140625, |
| "scores": { |
| "A": -8.984375, |
| "B": -8.984375, |
| "C": -5.54296875, |
| "D": -9.96875, |
| "E": -10.765625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-21", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.765625, |
| "scores": { |
| "A": -10.4140625, |
| "B": -9.6484375, |
| "C": -12.5546875, |
| "D": -12.234375, |
| "E": -11.3828125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.3984375, |
| "scores": { |
| "A": -9.015625, |
| "B": -7.6171875, |
| "C": -11.046875, |
| "D": -11.390625, |
| "E": -10.84375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-25", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.0546875, |
| "scores": { |
| "A": -12.953125, |
| "B": -12.2578125, |
| "C": -12.203125, |
| "D": -12.4140625, |
| "E": -13.6328125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.140625, |
| "scores": { |
| "A": -10.734375, |
| "B": -9.5, |
| "C": -9.640625, |
| "D": -10.234375, |
| "E": -10.859375 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-33", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.1875, |
| "scores": { |
| "A": -17.28125, |
| "B": -18.1875, |
| "C": -16.09375, |
| "D": -19.15625, |
| "E": -19.46875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.453125, |
| "scores": { |
| "A": -11.296875, |
| "B": -10.015625, |
| "C": -8.5625, |
| "D": -11.7890625, |
| "E": -11.21875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-39", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.71875, |
| "scores": { |
| "A": -10.2265625, |
| "B": -11.9453125, |
| "C": -12.1484375, |
| "D": -14.3125, |
| "E": -14.015625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 0.9375, |
| "scores": { |
| "A": -8.546875, |
| "B": -9.484375, |
| "C": -9.75, |
| "D": -11.703125, |
| "E": -10.671875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-47", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.203125, |
| "scores": { |
| "A": -11.9453125, |
| "B": -12.5, |
| "C": -12.1171875, |
| "D": -13.046875, |
| "E": -11.7421875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.15625, |
| "scores": { |
| "A": -12.296875, |
| "B": -13.421875, |
| "C": -12.203125, |
| "D": -13.125, |
| "E": -12.359375 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-52", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.0625, |
| "scores": { |
| "A": -12.890625, |
| "B": -9.8515625, |
| "C": -9.9140625, |
| "D": -11.515625, |
| "E": -10.6875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.140625, |
| "scores": { |
| "A": -10.453125, |
| "B": -7.85546875, |
| "C": -7.71484375, |
| "D": -10.40625, |
| "E": -10.203125 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-57", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.15625, |
| "scores": { |
| "A": -13.875, |
| "B": -12.96875, |
| "C": -14.359375, |
| "D": -14.140625, |
| "E": -13.125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.3515625, |
| "scores": { |
| "A": -12.3203125, |
| "B": -11.6171875, |
| "C": -12.796875, |
| "D": -12.8359375, |
| "E": -11.96875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-68", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6875, |
| "scores": { |
| "A": -11.65625, |
| "B": -10.96875, |
| "C": -11.875, |
| "D": -12.078125, |
| "E": -12.640625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.578125, |
| "scores": { |
| "A": -10.34375, |
| "B": -9.65625, |
| "C": -10.234375, |
| "D": -11.578125, |
| "E": -11.234375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-78", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.078125, |
| "scores": { |
| "A": -12.7890625, |
| "B": -8.3203125, |
| "C": -11.3984375, |
| "D": -13.765625, |
| "E": -13.84375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.0625, |
| "scores": { |
| "A": -11.1640625, |
| "B": -7.296875, |
| "C": -10.359375, |
| "D": -12.5546875, |
| "E": -13.0703125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-87", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.625, |
| "scores": { |
| "A": -9.7890625, |
| "B": -9.1640625, |
| "C": -11.234375, |
| "D": -12.0, |
| "E": -11.46875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.171875, |
| "scores": { |
| "A": -8.96875, |
| "B": -8.796875, |
| "C": -10.609375, |
| "D": -11.2421875, |
| "E": -10.7421875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-100", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.1484375, |
| "scores": { |
| "A": -9.265625, |
| "B": -9.7265625, |
| "C": -9.1171875, |
| "D": -10.0546875, |
| "E": -10.6015625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.234375, |
| "scores": { |
| "A": -7.7421875, |
| "B": -8.1015625, |
| "C": -7.5078125, |
| "D": -8.6875, |
| "E": -9.671875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-103", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1484375, |
| "scores": { |
| "A": -9.734375, |
| "B": -8.5234375, |
| "C": -9.6875, |
| "D": -11.4375, |
| "E": -9.671875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.71875, |
| "scores": { |
| "A": -8.4921875, |
| "B": -7.7734375, |
| "C": -9.0703125, |
| "D": -11.109375, |
| "E": -9.640625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-105", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 3.03125, |
| "scores": { |
| "A": -11.5, |
| "B": -12.0234375, |
| "C": -8.46875, |
| "D": -13.9765625, |
| "E": -13.28125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.9140625, |
| "scores": { |
| "A": -9.4140625, |
| "B": -11.140625, |
| "C": -6.5, |
| "D": -13.234375, |
| "E": -12.96875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-111", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.203125, |
| "scores": { |
| "A": -9.796875, |
| "B": -9.2734375, |
| "C": -9.4765625, |
| "D": -10.7578125, |
| "E": -11.4296875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.3984375, |
| "scores": { |
| "A": -8.8828125, |
| "B": -8.9140625, |
| "C": -8.515625, |
| "D": -9.6953125, |
| "E": -10.2109375 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-116", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.7109375, |
| "scores": { |
| "A": -12.0390625, |
| "B": -9.7421875, |
| "C": -11.453125, |
| "D": -11.5390625, |
| "E": -11.8203125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.6875, |
| "scores": { |
| "A": -9.0234375, |
| "B": -7.3359375, |
| "C": -10.3359375, |
| "D": -10.328125, |
| "E": -10.328125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-120", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.3515625, |
| "scores": { |
| "A": -12.625, |
| "B": -10.171875, |
| "C": -10.5234375, |
| "D": -11.96875, |
| "E": -12.625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.015625, |
| "scores": { |
| "A": -10.9296875, |
| "B": -8.8203125, |
| "C": -8.8359375, |
| "D": -10.6171875, |
| "E": -11.7109375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-122", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.671875, |
| "scores": { |
| "A": -11.09375, |
| "B": -10.421875, |
| "C": -13.25, |
| "D": -13.296875, |
| "E": -13.5 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.296875, |
| "scores": { |
| "A": -9.921875, |
| "B": -9.625, |
| "C": -12.3203125, |
| "D": -12.46875, |
| "E": -13.0625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-123", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.9296875, |
| "scores": { |
| "A": -12.8125, |
| "B": -13.265625, |
| "C": -10.09375, |
| "D": -12.0234375, |
| "E": -12.84375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.0390625, |
| "scores": { |
| "A": -9.3984375, |
| "B": -10.296875, |
| "C": -8.359375, |
| "D": -9.796875, |
| "E": -10.875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-125", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.25, |
| "scores": { |
| "A": -12.8984375, |
| "B": -12.015625, |
| "C": -10.3671875, |
| "D": -10.6171875, |
| "E": -11.515625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.2109375, |
| "scores": { |
| "A": -9.1640625, |
| "B": -8.953125, |
| "C": -7.7421875, |
| "D": -9.046875, |
| "E": -9.6796875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-130", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.5625, |
| "scores": { |
| "A": -12.53125, |
| "B": -11.625, |
| "C": -14.40625, |
| "D": -11.0625, |
| "E": -12.203125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -0.578125, |
| "scores": { |
| "A": -9.3125, |
| "B": -8.5, |
| "C": -9.578125, |
| "D": -8.78125, |
| "E": -8.203125 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-140", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.09375, |
| "scores": { |
| "A": -12.171875, |
| "B": -10.953125, |
| "C": -12.484375, |
| "D": -12.046875, |
| "E": -12.828125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.03125, |
| "scores": { |
| "A": -9.25, |
| "B": -9.28125, |
| "C": -9.96875, |
| "D": -10.46875, |
| "E": -11.1875 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-141", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.390625, |
| "scores": { |
| "A": -15.65625, |
| "B": -14.0, |
| "C": -12.3359375, |
| "D": -12.7265625, |
| "E": -13.421875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.390625, |
| "scores": { |
| "A": -13.2265625, |
| "B": -11.828125, |
| "C": -10.125, |
| "D": -10.515625, |
| "E": -10.65625 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-148", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.09375, |
| "scores": { |
| "A": -11.84375, |
| "B": -8.6875, |
| "C": -9.390625, |
| "D": -8.59375, |
| "E": -10.328125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.1875, |
| "scores": { |
| "A": -11.15625, |
| "B": -8.8125, |
| "C": -8.5625, |
| "D": -8.375, |
| "E": -9.453125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-152", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.65625, |
| "scores": { |
| "A": -12.15625, |
| "B": -11.09375, |
| "C": -11.75, |
| "D": -11.765625, |
| "E": -11.75 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.78125, |
| "scores": { |
| "A": -9.078125, |
| "B": -9.859375, |
| "C": -10.28125, |
| "D": -9.53125, |
| "E": -9.421875 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-167", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.28125, |
| "scores": { |
| "A": -13.3125, |
| "B": -10.640625, |
| "C": -12.921875, |
| "D": -16.09375, |
| "E": -14.75 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.3203125, |
| "scores": { |
| "A": -10.1953125, |
| "B": -8.875, |
| "C": -10.5625, |
| "D": -12.84375, |
| "E": -12.578125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-178", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.4609375, |
| "scores": { |
| "A": -13.1953125, |
| "B": -11.1015625, |
| "C": -12.8203125, |
| "D": -12.625, |
| "E": -10.640625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.3671875, |
| "scores": { |
| "A": -10.4296875, |
| "B": -8.125, |
| "C": -8.296875, |
| "D": -10.5546875, |
| "E": -9.4921875 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-181", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7734375, |
| "scores": { |
| "A": -9.6328125, |
| "B": -8.859375, |
| "C": -11.828125, |
| "D": -11.640625, |
| "E": -10.6875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.28125, |
| "scores": { |
| "A": -8.0234375, |
| "B": -7.7421875, |
| "C": -9.6484375, |
| "D": -10.40625, |
| "E": -9.8203125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-183", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.40625, |
| "scores": { |
| "A": -12.6484375, |
| "B": -9.3671875, |
| "C": -10.7734375, |
| "D": -13.140625, |
| "E": -13.125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.8203125, |
| "scores": { |
| "A": -11.0703125, |
| "B": -8.109375, |
| "C": -8.9296875, |
| "D": -12.046875, |
| "E": -12.53125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-189", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2109375, |
| "scores": { |
| "A": -14.5390625, |
| "B": -11.546875, |
| "C": -11.8046875, |
| "D": -11.7578125, |
| "E": -13.34375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.4921875, |
| "scores": { |
| "A": -11.890625, |
| "B": -10.34375, |
| "C": -9.8515625, |
| "D": -10.515625, |
| "E": -10.8125 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-190", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.34375, |
| "scores": { |
| "A": -13.6015625, |
| "B": -10.7734375, |
| "C": -10.4296875, |
| "D": -13.3671875, |
| "E": -13.9296875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.4921875, |
| "scores": { |
| "A": -10.703125, |
| "B": -9.3515625, |
| "C": -8.859375, |
| "D": -11.4765625, |
| "E": -12.3984375 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-191", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -11.890625, |
| "B": -10.9375, |
| "C": -13.640625, |
| "D": -14.109375, |
| "E": -13.765625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.03125, |
| "scores": { |
| "A": -8.28125, |
| "B": -8.3125, |
| "C": -10.6328125, |
| "D": -10.890625, |
| "E": -11.5234375 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-206", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.1796875, |
| "scores": { |
| "A": -11.6171875, |
| "B": -10.96875, |
| "C": -11.1484375, |
| "D": -12.84375, |
| "E": -14.0 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.109375, |
| "scores": { |
| "A": -10.09375, |
| "B": -9.984375, |
| "C": -10.5625, |
| "D": -11.59375, |
| "E": -13.203125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-212", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.5078125, |
| "scores": { |
| "A": -11.59375, |
| "B": -9.609375, |
| "C": -11.1171875, |
| "D": -11.7421875, |
| "E": -12.8359375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.359375, |
| "scores": { |
| "A": -9.8125, |
| "B": -8.453125, |
| "C": -10.125, |
| "D": -10.421875, |
| "E": -12.0 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-223", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.09375, |
| "scores": { |
| "A": -10.265625, |
| "B": -7.1484375, |
| "C": -10.2421875, |
| "D": -10.921875, |
| "E": -11.3359375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.53125, |
| "scores": { |
| "A": -10.0625, |
| "B": -7.328125, |
| "C": -9.859375, |
| "D": -10.203125, |
| "E": -11.421875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-228", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.078125, |
| "scores": { |
| "A": -13.3125, |
| "B": -10.71875, |
| "C": -10.796875, |
| "D": -14.0859375, |
| "E": -14.8984375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.484375, |
| "scores": { |
| "A": -10.3125, |
| "B": -7.9140625, |
| "C": -8.3984375, |
| "D": -11.421875, |
| "E": -11.21875 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-249", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.828125, |
| "scores": { |
| "A": -10.328125, |
| "B": -8.859375, |
| "C": -8.03125, |
| "D": -11.640625, |
| "E": -11.109375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.03125, |
| "scores": { |
| "A": -9.078125, |
| "B": -7.50390625, |
| "C": -7.47265625, |
| "D": -10.890625, |
| "E": -9.953125 |
| } |
| }, |
| "flip": false |
| }, |
| { |
| "ex_id": "aqua-test-251", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.078125, |
| "scores": { |
| "A": -9.90625, |
| "B": -9.828125, |
| "C": -12.40625, |
| "D": -11.265625, |
| "E": -10.703125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.390625, |
| "scores": { |
| "A": -8.671875, |
| "B": -9.0625, |
| "C": -11.2734375, |
| "D": -10.6015625, |
| "E": -9.640625 |
| } |
| }, |
| "flip": true |
| } |
| ], |
| "1.0": [ |
| { |
| "ex_id": "aqua-test-2", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2578125, |
| "scores": { |
| "A": -11.234375, |
| "B": -10.2109375, |
| "C": -13.171875, |
| "D": -12.4453125, |
| "E": -10.46875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.953125, |
| "scores": { |
| "A": -6.0625, |
| "B": -14.015625, |
| "C": -17.125, |
| "D": -15.2734375, |
| "E": -15.640625 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-5", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -11.9921875, |
| "B": -10.9765625, |
| "C": -12.0390625, |
| "D": -11.9609375, |
| "E": -11.9296875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.1640625, |
| "scores": { |
| "A": -7.59375, |
| "B": -9.7578125, |
| "C": -11.0234375, |
| "D": -9.1953125, |
| "E": -10.0625 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-9", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.078125, |
| "scores": { |
| "A": -11.265625, |
| "B": -8.890625, |
| "C": -9.96875, |
| "D": -12.359375, |
| "E": -13.9921875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.01953125, |
| "scores": { |
| "A": -7.32421875, |
| "B": -11.34375, |
| "C": -11.5, |
| "D": -13.6875, |
| "E": -15.28125 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-15", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.671875, |
| "scores": { |
| "A": -11.078125, |
| "B": -10.40625, |
| "C": -13.625, |
| "D": -15.3125, |
| "E": -13.8125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.7421875, |
| "scores": { |
| "A": -6.1171875, |
| "B": -10.859375, |
| "C": -11.296875, |
| "D": -11.0625, |
| "E": -13.578125 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-16", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.80078125, |
| "scores": { |
| "A": -12.484375, |
| "B": -10.515625, |
| "C": -7.71484375, |
| "D": -12.859375, |
| "E": -12.8125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.87109375, |
| "scores": { |
| "A": -7.82421875, |
| "B": -9.453125, |
| "C": -8.6953125, |
| "D": -9.59375, |
| "E": -11.6953125 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-21", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.765625, |
| "scores": { |
| "A": -10.4140625, |
| "B": -9.6484375, |
| "C": -12.5546875, |
| "D": -12.234375, |
| "E": -11.3828125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.5390625, |
| "scores": { |
| "A": -6.1328125, |
| "B": -9.671875, |
| "C": -10.8046875, |
| "D": -10.671875, |
| "E": -11.2109375 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-25", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.0546875, |
| "scores": { |
| "A": -12.953125, |
| "B": -12.2578125, |
| "C": -12.203125, |
| "D": -12.4140625, |
| "E": -13.6328125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.5625, |
| "scores": { |
| "A": -8.6484375, |
| "B": -10.8828125, |
| "C": -10.2109375, |
| "D": -9.4609375, |
| "E": -10.859375 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-33", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.1875, |
| "scores": { |
| "A": -17.28125, |
| "B": -18.1875, |
| "C": -16.09375, |
| "D": -19.15625, |
| "E": -19.46875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.4375, |
| "scores": { |
| "A": -9.125, |
| "B": -10.1171875, |
| "C": -9.5625, |
| "D": -10.0703125, |
| "E": -10.4921875 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-39", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.71875, |
| "scores": { |
| "A": -10.2265625, |
| "B": -11.9453125, |
| "C": -12.1484375, |
| "D": -14.3125, |
| "E": -14.015625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.390625, |
| "scores": { |
| "A": -10.234375, |
| "B": -10.1875, |
| "C": -9.84375, |
| "D": -11.59375, |
| "E": -10.8515625 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-47", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.203125, |
| "scores": { |
| "A": -11.9453125, |
| "B": -12.5, |
| "C": -12.1171875, |
| "D": -13.046875, |
| "E": -11.7421875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.0390625, |
| "scores": { |
| "A": -9.3828125, |
| "B": -11.8515625, |
| "C": -13.359375, |
| "D": -12.15625, |
| "E": -13.421875 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-52", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.0625, |
| "scores": { |
| "A": -12.890625, |
| "B": -9.8515625, |
| "C": -9.9140625, |
| "D": -11.515625, |
| "E": -10.6875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.4765625, |
| "scores": { |
| "A": -5.1328125, |
| "B": -5.609375, |
| "C": -6.609375, |
| "D": -6.8984375, |
| "E": -6.4296875 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-57", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.15625, |
| "scores": { |
| "A": -13.875, |
| "B": -12.96875, |
| "C": -14.359375, |
| "D": -14.140625, |
| "E": -13.125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.640625, |
| "scores": { |
| "A": -10.3125, |
| "B": -12.953125, |
| "C": -12.8203125, |
| "D": -12.8359375, |
| "E": -12.5078125 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-68", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6875, |
| "scores": { |
| "A": -11.65625, |
| "B": -10.96875, |
| "C": -11.875, |
| "D": -12.078125, |
| "E": -12.640625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.1953125, |
| "scores": { |
| "A": -9.03125, |
| "B": -11.2265625, |
| "C": -11.265625, |
| "D": -11.5234375, |
| "E": -10.6171875 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-78", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.078125, |
| "scores": { |
| "A": -12.7890625, |
| "B": -8.3203125, |
| "C": -11.3984375, |
| "D": -13.765625, |
| "E": -13.84375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.63671875, |
| "scores": { |
| "A": -5.62890625, |
| "B": -10.265625, |
| "C": -11.3125, |
| "D": -12.078125, |
| "E": -12.4296875 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-87", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.625, |
| "scores": { |
| "A": -9.7890625, |
| "B": -9.1640625, |
| "C": -11.234375, |
| "D": -12.0, |
| "E": -11.46875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.90625, |
| "scores": { |
| "A": -6.578125, |
| "B": -9.484375, |
| "C": -9.703125, |
| "D": -9.1875, |
| "E": -11.25 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-100", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.1484375, |
| "scores": { |
| "A": -9.265625, |
| "B": -9.7265625, |
| "C": -9.1171875, |
| "D": -10.0546875, |
| "E": -10.6015625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.16015625, |
| "scores": { |
| "A": -4.76171875, |
| "B": -9.390625, |
| "C": -10.921875, |
| "D": -11.46875, |
| "E": -13.1875 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-103", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1484375, |
| "scores": { |
| "A": -9.734375, |
| "B": -8.5234375, |
| "C": -9.6875, |
| "D": -11.4375, |
| "E": -9.671875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.75, |
| "scores": { |
| "A": -7.84375, |
| "B": -8.59375, |
| "C": -10.53125, |
| "D": -9.78125, |
| "E": -8.421875 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-105", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 3.03125, |
| "scores": { |
| "A": -11.5, |
| "B": -12.0234375, |
| "C": -8.46875, |
| "D": -13.9765625, |
| "E": -13.28125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.12109375, |
| "scores": { |
| "A": -7.45703125, |
| "B": -9.78125, |
| "C": -8.578125, |
| "D": -13.375, |
| "E": -13.59375 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-111", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.203125, |
| "scores": { |
| "A": -9.796875, |
| "B": -9.2734375, |
| "C": -9.4765625, |
| "D": -10.7578125, |
| "E": -11.4296875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.6328125, |
| "scores": { |
| "A": -8.4140625, |
| "B": -12.046875, |
| "C": -12.484375, |
| "D": -13.3125, |
| "E": -14.53125 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-116", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.7109375, |
| "scores": { |
| "A": -12.0390625, |
| "B": -9.7421875, |
| "C": -11.453125, |
| "D": -11.5390625, |
| "E": -11.8203125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.51171875, |
| "scores": { |
| "A": -7.28125, |
| "B": -7.4921875, |
| "C": -8.8203125, |
| "D": -6.98046875, |
| "E": -7.3046875 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-120", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.3515625, |
| "scores": { |
| "A": -12.625, |
| "B": -10.171875, |
| "C": -10.5234375, |
| "D": -11.96875, |
| "E": -12.625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.84375, |
| "scores": { |
| "A": -7.4375, |
| "B": -12.28125, |
| "C": -11.75, |
| "D": -11.984375, |
| "E": -13.75 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-122", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.671875, |
| "scores": { |
| "A": -11.09375, |
| "B": -10.421875, |
| "C": -13.25, |
| "D": -13.296875, |
| "E": -13.5 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.328125, |
| "scores": { |
| "A": -6.90625, |
| "B": -9.234375, |
| "C": -12.421875, |
| "D": -11.0, |
| "E": -12.1796875 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-123", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.9296875, |
| "scores": { |
| "A": -12.8125, |
| "B": -13.265625, |
| "C": -10.09375, |
| "D": -12.0234375, |
| "E": -12.84375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.0625, |
| "scores": { |
| "A": -7.71875, |
| "B": -10.9296875, |
| "C": -11.78125, |
| "D": -11.5546875, |
| "E": -13.9375 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-125", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.25, |
| "scores": { |
| "A": -12.8984375, |
| "B": -12.015625, |
| "C": -10.3671875, |
| "D": -10.6171875, |
| "E": -11.515625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.16796875, |
| "scores": { |
| "A": -6.90234375, |
| "B": -8.6875, |
| "C": -10.0703125, |
| "D": -9.84375, |
| "E": -10.1640625 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-130", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.5625, |
| "scores": { |
| "A": -12.53125, |
| "B": -11.625, |
| "C": -14.40625, |
| "D": -11.0625, |
| "E": -12.203125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.62890625, |
| "scores": { |
| "A": -7.70703125, |
| "B": -7.73046875, |
| "C": -10.4296875, |
| "D": -8.3359375, |
| "E": -8.21875 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-140", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.09375, |
| "scores": { |
| "A": -12.171875, |
| "B": -10.953125, |
| "C": -12.484375, |
| "D": -12.046875, |
| "E": -12.828125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.09375, |
| "scores": { |
| "A": -8.640625, |
| "B": -9.734375, |
| "C": -10.828125, |
| "D": -10.234375, |
| "E": -11.625 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-141", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.390625, |
| "scores": { |
| "A": -15.65625, |
| "B": -14.0, |
| "C": -12.3359375, |
| "D": -12.7265625, |
| "E": -13.421875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -1.5078125, |
| "scores": { |
| "A": -9.03125, |
| "B": -10.1640625, |
| "C": -9.59375, |
| "D": -8.109375, |
| "E": -8.0859375 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-148", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.09375, |
| "scores": { |
| "A": -11.84375, |
| "B": -8.6875, |
| "C": -9.390625, |
| "D": -8.59375, |
| "E": -10.328125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.2890625, |
| "scores": { |
| "A": -8.21875, |
| "B": -9.21875, |
| "C": -9.4140625, |
| "D": -8.5078125, |
| "E": -9.5859375 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-152", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.65625, |
| "scores": { |
| "A": -12.15625, |
| "B": -11.09375, |
| "C": -11.75, |
| "D": -11.765625, |
| "E": -11.75 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.6015625, |
| "scores": { |
| "A": -8.2734375, |
| "B": -12.875, |
| "C": -16.15625, |
| "D": -13.109375, |
| "E": -13.90625 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-167", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.28125, |
| "scores": { |
| "A": -13.3125, |
| "B": -10.640625, |
| "C": -12.921875, |
| "D": -16.09375, |
| "E": -14.75 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.328125, |
| "scores": { |
| "A": -8.015625, |
| "B": -11.34375, |
| "C": -15.125, |
| "D": -13.3125, |
| "E": -14.375 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-178", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.4609375, |
| "scores": { |
| "A": -13.1953125, |
| "B": -11.1015625, |
| "C": -12.8203125, |
| "D": -12.625, |
| "E": -10.640625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.72265625, |
| "scores": { |
| "A": -5.77734375, |
| "B": -10.375, |
| "C": -8.5, |
| "D": -10.84375, |
| "E": -12.5 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-181", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7734375, |
| "scores": { |
| "A": -9.6328125, |
| "B": -8.859375, |
| "C": -11.828125, |
| "D": -11.640625, |
| "E": -10.6875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.5078125, |
| "scores": { |
| "A": -7.5546875, |
| "B": -9.0625, |
| "C": -10.4453125, |
| "D": -9.140625, |
| "E": -9.0078125 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-183", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.40625, |
| "scores": { |
| "A": -12.6484375, |
| "B": -9.3671875, |
| "C": -10.7734375, |
| "D": -13.140625, |
| "E": -13.125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.71875, |
| "scores": { |
| "A": -8.6640625, |
| "B": -7.5078125, |
| "C": -6.7890625, |
| "D": -9.3046875, |
| "E": -10.7578125 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-189", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2109375, |
| "scores": { |
| "A": -14.5390625, |
| "B": -11.546875, |
| "C": -11.8046875, |
| "D": -11.7578125, |
| "E": -13.34375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.84375, |
| "scores": { |
| "A": -9.875, |
| "B": -10.3828125, |
| "C": -10.28125, |
| "D": -9.5390625, |
| "E": -9.5625 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-190", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.34375, |
| "scores": { |
| "A": -13.6015625, |
| "B": -10.7734375, |
| "C": -10.4296875, |
| "D": -13.3671875, |
| "E": -13.9296875 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.5, |
| "scores": { |
| "A": -6.3515625, |
| "B": -9.2578125, |
| "C": -12.8515625, |
| "D": -10.84375, |
| "E": -12.953125 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-191", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -11.890625, |
| "B": -10.9375, |
| "C": -13.640625, |
| "D": -14.109375, |
| "E": -13.765625 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.09375, |
| "scores": { |
| "A": -6.4375, |
| "B": -8.53125, |
| "C": -11.7734375, |
| "D": -11.28125, |
| "E": -12.3203125 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-206", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.1796875, |
| "scores": { |
| "A": -11.6171875, |
| "B": -10.96875, |
| "C": -11.1484375, |
| "D": -12.84375, |
| "E": -14.0 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.03125, |
| "scores": { |
| "A": -8.0, |
| "B": -11.03125, |
| "C": -12.328125, |
| "D": -11.765625, |
| "E": -14.171875 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-212", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.5078125, |
| "scores": { |
| "A": -11.59375, |
| "B": -9.609375, |
| "C": -11.1171875, |
| "D": -11.7421875, |
| "E": -12.8359375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.390625, |
| "scores": { |
| "A": -4.984375, |
| "B": -8.375, |
| "C": -11.28125, |
| "D": -9.4140625, |
| "E": -11.890625 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-223", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.09375, |
| "scores": { |
| "A": -10.265625, |
| "B": -7.1484375, |
| "C": -10.2421875, |
| "D": -10.921875, |
| "E": -11.3359375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.09765625, |
| "scores": { |
| "A": -4.67578125, |
| "B": -8.7734375, |
| "C": -12.375, |
| "D": -11.1796875, |
| "E": -13.109375 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-228", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.078125, |
| "scores": { |
| "A": -13.3125, |
| "B": -10.71875, |
| "C": -10.796875, |
| "D": -14.0859375, |
| "E": -14.8984375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.28125, |
| "scores": { |
| "A": -7.171875, |
| "B": -8.453125, |
| "C": -9.515625, |
| "D": -10.125, |
| "E": -8.953125 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-249", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.828125, |
| "scores": { |
| "A": -10.328125, |
| "B": -8.859375, |
| "C": -8.03125, |
| "D": -11.640625, |
| "E": -11.109375 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.59375, |
| "scores": { |
| "A": -5.9921875, |
| "B": -7.609375, |
| "C": -8.5859375, |
| "D": -8.4375, |
| "E": -8.6796875 |
| } |
| }, |
| "flip": true |
| }, |
| { |
| "ex_id": "aqua-test-251", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.078125, |
| "scores": { |
| "A": -9.90625, |
| "B": -9.828125, |
| "C": -12.40625, |
| "D": -11.265625, |
| "E": -10.703125 |
| } |
| }, |
| "ablated": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.984375, |
| "scores": { |
| "A": -9.296875, |
| "B": -12.28125, |
| "C": -13.5, |
| "D": -12.09375, |
| "E": -11.140625 |
| } |
| }, |
| "flip": true |
| } |
| ] |
| }, |
| "donors_meta": [ |
| { |
| "n_donor_bank": 192, |
| "donor_source": "cross_task_eval", |
| "donor_tasks": [ |
| "gsm8k", |
| "commonsenseqa", |
| "strategyqa" |
| ], |
| "donor_n_eval": 64, |
| "donor_pick": "cyclic", |
| "donor_require_gold_in_candidates": false, |
| "donor_require_baseline_correct": false |
| } |
| ], |
| "transfer_patching_summary_on_flipset": { |
| "patched_transfer": { |
| "n": 42, |
| "rescued": 33, |
| "rescued_pct": 78.57142857142857, |
| "mean_delta_margin_vs_ablated": 3.4120163917541504, |
| "median_delta_margin_vs_ablated": 3.212890625 |
| }, |
| "patched_self": { |
| "n": 42, |
| "rescued": 31, |
| "rescued_pct": 73.80952380952381, |
| "mean_delta_margin_vs_ablated": 3.3116629123687744, |
| "median_delta_margin_vs_ablated": 3.0859375 |
| } |
| }, |
| "transfer_patching_rows": [ |
| { |
| "ex_id": "aqua-test-2", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2578125, |
| "scores": { |
| "A": -11.234375, |
| "B": -10.2109375, |
| "C": -13.171875, |
| "D": -12.4453125, |
| "E": -10.46875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -7.953125, |
| "scores": { |
| "A": -6.0625, |
| "B": -14.015625, |
| "C": -17.125, |
| "D": -15.2734375, |
| "E": -15.640625 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.7109375, |
| "scores": { |
| "A": -9.8203125, |
| "B": -7.5546875, |
| "C": -9.265625, |
| "D": -10.609375, |
| "E": -10.28125 |
| } |
| }, |
| "transfer_donor_ex_id": "strategyqa-test-19", |
| "transfer_donor_gold": "NO", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.3828125, |
| "scores": { |
| "A": -9.859375, |
| "B": -8.4765625, |
| "C": -10.296875, |
| "D": -10.875, |
| "E": -10.703125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-5", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -11.9921875, |
| "B": -10.9765625, |
| "C": -12.0390625, |
| "D": -11.9609375, |
| "E": -11.9296875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.1640625, |
| "scores": { |
| "A": -7.59375, |
| "B": -9.7578125, |
| "C": -11.0234375, |
| "D": -9.1953125, |
| "E": -10.0625 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.1796875, |
| "scores": { |
| "A": -8.765625, |
| "B": -7.5, |
| "C": -7.6796875, |
| "D": -8.9609375, |
| "E": -9.078125 |
| } |
| }, |
| "transfer_donor_ex_id": "strategyqa-test-12", |
| "transfer_donor_gold": "YES", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.15625, |
| "scores": { |
| "A": -9.109375, |
| "B": -8.265625, |
| "C": -8.421875, |
| "D": -9.328125, |
| "E": -9.609375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-9", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.078125, |
| "scores": { |
| "A": -11.265625, |
| "B": -8.890625, |
| "C": -9.96875, |
| "D": -12.359375, |
| "E": -13.9921875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.01953125, |
| "scores": { |
| "A": -7.32421875, |
| "B": -11.34375, |
| "C": -11.5, |
| "D": -13.6875, |
| "E": -15.28125 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.9921875, |
| "scores": { |
| "A": -10.1015625, |
| "B": -6.890625, |
| "C": -7.8828125, |
| "D": -10.5546875, |
| "E": -11.984375 |
| } |
| }, |
| "transfer_donor_ex_id": "strategyqa-test-39", |
| "transfer_donor_gold": "YES", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.109375, |
| "scores": { |
| "A": -10.0625, |
| "B": -7.453125, |
| "C": -8.5625, |
| "D": -10.875, |
| "E": -12.3671875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-15", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.671875, |
| "scores": { |
| "A": -11.078125, |
| "B": -10.40625, |
| "C": -13.625, |
| "D": -15.3125, |
| "E": -13.8125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.7421875, |
| "scores": { |
| "A": -6.1171875, |
| "B": -10.859375, |
| "C": -11.296875, |
| "D": -11.0625, |
| "E": -13.578125 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.24609375, |
| "scores": { |
| "A": -8.53125, |
| "B": -7.28515625, |
| "C": -9.3984375, |
| "D": -11.59375, |
| "E": -11.796875 |
| } |
| }, |
| "transfer_donor_ex_id": "strategyqa-test-18", |
| "transfer_donor_gold": "YES", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7578125, |
| "scores": { |
| "A": -9.328125, |
| "B": -8.5703125, |
| "C": -10.296875, |
| "D": -11.6328125, |
| "E": -11.7734375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-16", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.80078125, |
| "scores": { |
| "A": -12.484375, |
| "B": -10.515625, |
| "C": -7.71484375, |
| "D": -12.859375, |
| "E": -12.8125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.87109375, |
| "scores": { |
| "A": -7.82421875, |
| "B": -9.453125, |
| "C": -8.6953125, |
| "D": -9.59375, |
| "E": -11.6953125 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.41796875, |
| "scores": { |
| "A": -10.203125, |
| "B": -9.015625, |
| "C": -7.59765625, |
| "D": -9.90625, |
| "E": -11.15625 |
| } |
| }, |
| "transfer_donor_ex_id": "commonsenseqa-validation-11", |
| "transfer_donor_gold": "E", |
| "patched_self": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.3984375, |
| "scores": { |
| "A": -10.015625, |
| "B": -9.453125, |
| "C": -8.0546875, |
| "D": -10.046875, |
| "E": -11.390625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-21", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.765625, |
| "scores": { |
| "A": -10.4140625, |
| "B": -9.6484375, |
| "C": -12.5546875, |
| "D": -12.234375, |
| "E": -11.3828125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.5390625, |
| "scores": { |
| "A": -6.1328125, |
| "B": -9.671875, |
| "C": -10.8046875, |
| "D": -10.671875, |
| "E": -11.2109375 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.73046875, |
| "scores": { |
| "A": -10.0, |
| "B": -7.85546875, |
| "C": -9.5859375, |
| "D": -10.15625, |
| "E": -10.53125 |
| } |
| }, |
| "transfer_donor_ex_id": "strategyqa-test-47", |
| "transfer_donor_gold": "NO", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.625, |
| "scores": { |
| "A": -10.0078125, |
| "B": -8.3828125, |
| "C": -10.2265625, |
| "D": -10.3984375, |
| "E": -11.0625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-25", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.0546875, |
| "scores": { |
| "A": -12.953125, |
| "B": -12.2578125, |
| "C": -12.203125, |
| "D": -12.4140625, |
| "E": -13.6328125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.5625, |
| "scores": { |
| "A": -8.6484375, |
| "B": -10.8828125, |
| "C": -10.2109375, |
| "D": -9.4609375, |
| "E": -10.859375 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.203125, |
| "scores": { |
| "A": -9.4140625, |
| "B": -7.7890625, |
| "C": -7.9921875, |
| "D": -9.34375, |
| "E": -10.2578125 |
| } |
| }, |
| "transfer_donor_ex_id": "strategyqa-test-26", |
| "transfer_donor_gold": "NO", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.171875, |
| "scores": { |
| "A": -9.7890625, |
| "B": -8.2734375, |
| "C": -8.4453125, |
| "D": -9.390625, |
| "E": -10.4609375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-33", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.1875, |
| "scores": { |
| "A": -17.28125, |
| "B": -18.1875, |
| "C": -16.09375, |
| "D": -19.15625, |
| "E": -19.46875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.4375, |
| "scores": { |
| "A": -9.125, |
| "B": -10.1171875, |
| "C": -9.5625, |
| "D": -10.0703125, |
| "E": -10.4921875 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.8828125, |
| "scores": { |
| "A": -10.7890625, |
| "B": -8.8828125, |
| "C": -8.0, |
| "D": -9.9453125, |
| "E": -10.921875 |
| } |
| }, |
| "transfer_donor_ex_id": "strategyqa-test-30", |
| "transfer_donor_gold": "YES", |
| "patched_self": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.9375, |
| "scores": { |
| "A": -10.890625, |
| "B": -9.453125, |
| "C": -8.515625, |
| "D": -10.0625, |
| "E": -11.015625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-39", |
| "gold": "A", |
| "baseline": { |
| "pred_label": "A", |
| "correct": true, |
| "margin": 1.71875, |
| "scores": { |
| "A": -10.2265625, |
| "B": -11.9453125, |
| "C": -12.1484375, |
| "D": -14.3125, |
| "E": -14.015625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.390625, |
| "scores": { |
| "A": -10.234375, |
| "B": -10.1875, |
| "C": -9.84375, |
| "D": -11.59375, |
| "E": -10.8515625 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.0078125, |
| "scores": { |
| "A": -9.1953125, |
| "B": -9.7578125, |
| "C": -9.1875, |
| "D": -10.234375, |
| "E": -9.25 |
| } |
| }, |
| "transfer_donor_ex_id": "gsm8k-test-60", |
| "transfer_donor_gold": "100", |
| "patched_self": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.265625, |
| "scores": { |
| "A": -9.4375, |
| "B": -9.75, |
| "C": -9.171875, |
| "D": -10.28125, |
| "E": -9.4609375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-47", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.203125, |
| "scores": { |
| "A": -11.9453125, |
| "B": -12.5, |
| "C": -12.1171875, |
| "D": -13.046875, |
| "E": -11.7421875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.0390625, |
| "scores": { |
| "A": -9.3828125, |
| "B": -11.8515625, |
| "C": -13.359375, |
| "D": -12.15625, |
| "E": -13.421875 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -1.03125, |
| "scores": { |
| "A": -10.859375, |
| "B": -11.03125, |
| "C": -9.9375, |
| "D": -11.546875, |
| "E": -10.96875 |
| } |
| }, |
| "transfer_donor_ex_id": "gsm8k-test-10", |
| "transfer_donor_gold": "2", |
| "patched_self": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.8125, |
| "scores": { |
| "A": -12.0, |
| "B": -12.4375, |
| "C": -11.296875, |
| "D": -12.40625, |
| "E": -12.109375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-52", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.0625, |
| "scores": { |
| "A": -12.890625, |
| "B": -9.8515625, |
| "C": -9.9140625, |
| "D": -11.515625, |
| "E": -10.6875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.4765625, |
| "scores": { |
| "A": -5.1328125, |
| "B": -5.609375, |
| "C": -6.609375, |
| "D": -6.8984375, |
| "E": -6.4296875 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.078125, |
| "scores": { |
| "A": -10.609375, |
| "B": -6.8203125, |
| "C": -6.8984375, |
| "D": -9.625, |
| "E": -9.734375 |
| } |
| }, |
| "transfer_donor_ex_id": "commonsenseqa-validation-60", |
| "transfer_donor_gold": "C", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.21875, |
| "scores": { |
| "A": -10.234375, |
| "B": -6.859375, |
| "C": -7.078125, |
| "D": -9.6015625, |
| "E": -9.796875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-57", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.15625, |
| "scores": { |
| "A": -13.875, |
| "B": -12.96875, |
| "C": -14.359375, |
| "D": -14.140625, |
| "E": -13.125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.640625, |
| "scores": { |
| "A": -10.3125, |
| "B": -12.953125, |
| "C": -12.8203125, |
| "D": -12.8359375, |
| "E": -12.5078125 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.5546875, |
| "scores": { |
| "A": -12.40625, |
| "B": -11.78125, |
| "C": -12.84375, |
| "D": -13.421875, |
| "E": -12.3359375 |
| } |
| }, |
| "transfer_donor_ex_id": "gsm8k-test-25", |
| "transfer_donor_gold": "6", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.3515625, |
| "scores": { |
| "A": -12.84375, |
| "B": -12.4296875, |
| "C": -13.4375, |
| "D": -13.765625, |
| "E": -12.78125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-68", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.6875, |
| "scores": { |
| "A": -11.65625, |
| "B": -10.96875, |
| "C": -11.875, |
| "D": -12.078125, |
| "E": -12.640625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.1953125, |
| "scores": { |
| "A": -9.03125, |
| "B": -11.2265625, |
| "C": -11.265625, |
| "D": -11.5234375, |
| "E": -10.6171875 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.265625, |
| "scores": { |
| "A": -10.140625, |
| "B": -8.859375, |
| "C": -9.125, |
| "D": -10.7578125, |
| "E": -10.1015625 |
| } |
| }, |
| "transfer_donor_ex_id": "strategyqa-test-5", |
| "transfer_donor_gold": "YES", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.046875, |
| "scores": { |
| "A": -10.1796875, |
| "B": -9.7734375, |
| "C": -9.8203125, |
| "D": -11.078125, |
| "E": -10.5625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-78", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.078125, |
| "scores": { |
| "A": -12.7890625, |
| "B": -8.3203125, |
| "C": -11.3984375, |
| "D": -13.765625, |
| "E": -13.84375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.63671875, |
| "scores": { |
| "A": -5.62890625, |
| "B": -10.265625, |
| "C": -11.3125, |
| "D": -12.078125, |
| "E": -12.4296875 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.9921875, |
| "scores": { |
| "A": -10.28125, |
| "B": -7.109375, |
| "C": -9.1015625, |
| "D": -11.7890625, |
| "E": -11.53125 |
| } |
| }, |
| "transfer_donor_ex_id": "gsm8k-test-32", |
| "transfer_donor_gold": "11232", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.671875, |
| "scores": { |
| "A": -10.6796875, |
| "B": -7.3671875, |
| "C": -9.0390625, |
| "D": -11.8359375, |
| "E": -11.515625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-87", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.625, |
| "scores": { |
| "A": -9.7890625, |
| "B": -9.1640625, |
| "C": -11.234375, |
| "D": -12.0, |
| "E": -11.46875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.90625, |
| "scores": { |
| "A": -6.578125, |
| "B": -9.484375, |
| "C": -9.703125, |
| "D": -9.1875, |
| "E": -11.25 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.578125, |
| "scores": { |
| "A": -9.5234375, |
| "B": -7.9453125, |
| "C": -10.046875, |
| "D": -10.640625, |
| "E": -10.7578125 |
| } |
| }, |
| "transfer_donor_ex_id": "commonsenseqa-validation-13", |
| "transfer_donor_gold": "A", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.4296875, |
| "scores": { |
| "A": -9.921875, |
| "B": -8.4921875, |
| "C": -10.4140625, |
| "D": -10.9453125, |
| "E": -11.1328125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-100", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.1484375, |
| "scores": { |
| "A": -9.265625, |
| "B": -9.7265625, |
| "C": -9.1171875, |
| "D": -10.0546875, |
| "E": -10.6015625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.16015625, |
| "scores": { |
| "A": -4.76171875, |
| "B": -9.390625, |
| "C": -10.921875, |
| "D": -11.46875, |
| "E": -13.1875 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.5546875, |
| "scores": { |
| "A": -7.515625, |
| "B": -7.2734375, |
| "C": -6.71875, |
| "D": -8.953125, |
| "E": -9.90625 |
| } |
| }, |
| "transfer_donor_ex_id": "strategyqa-test-52", |
| "transfer_donor_gold": "NO", |
| "patched_self": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.421875, |
| "scores": { |
| "A": -8.2421875, |
| "B": -8.515625, |
| "C": -7.8203125, |
| "D": -9.5078125, |
| "E": -10.5859375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-103", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1484375, |
| "scores": { |
| "A": -9.734375, |
| "B": -8.5234375, |
| "C": -9.6875, |
| "D": -11.4375, |
| "E": -9.671875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.75, |
| "scores": { |
| "A": -7.84375, |
| "B": -8.59375, |
| "C": -10.53125, |
| "D": -9.78125, |
| "E": -8.421875 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.296875, |
| "scores": { |
| "A": -8.8125, |
| "B": -7.390625, |
| "C": -8.859375, |
| "D": -10.28125, |
| "E": -8.6875 |
| } |
| }, |
| "transfer_donor_ex_id": "gsm8k-test-22", |
| "transfer_donor_gold": "18", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.27734375, |
| "scores": { |
| "A": -8.953125, |
| "B": -7.44140625, |
| "C": -8.75, |
| "D": -10.265625, |
| "E": -8.71875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-105", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 3.03125, |
| "scores": { |
| "A": -11.5, |
| "B": -12.0234375, |
| "C": -8.46875, |
| "D": -13.9765625, |
| "E": -13.28125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.12109375, |
| "scores": { |
| "A": -7.45703125, |
| "B": -9.78125, |
| "C": -8.578125, |
| "D": -13.375, |
| "E": -13.59375 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.23046875, |
| "scores": { |
| "A": -10.125, |
| "B": -9.671875, |
| "C": -7.44140625, |
| "D": -12.0625, |
| "E": -12.125 |
| } |
| }, |
| "transfer_donor_ex_id": "gsm8k-test-9", |
| "transfer_donor_gold": "2", |
| "patched_self": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 2.203125, |
| "scores": { |
| "A": -10.34375, |
| "B": -10.25, |
| "C": -8.046875, |
| "D": -12.796875, |
| "E": -12.75 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-111", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.203125, |
| "scores": { |
| "A": -9.796875, |
| "B": -9.2734375, |
| "C": -9.4765625, |
| "D": -10.7578125, |
| "E": -11.4296875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.6328125, |
| "scores": { |
| "A": -8.4140625, |
| "B": -12.046875, |
| "C": -12.484375, |
| "D": -13.3125, |
| "E": -14.53125 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.171875, |
| "scores": { |
| "A": -8.796875, |
| "B": -7.1796875, |
| "C": -7.3515625, |
| "D": -9.28125, |
| "E": -8.6640625 |
| } |
| }, |
| "transfer_donor_ex_id": "strategyqa-test-8", |
| "transfer_donor_gold": "YES", |
| "patched_self": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.0234375, |
| "scores": { |
| "A": -9.125, |
| "B": -8.1484375, |
| "C": -8.125, |
| "D": -9.4453125, |
| "E": -9.2578125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-116", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.7109375, |
| "scores": { |
| "A": -12.0390625, |
| "B": -9.7421875, |
| "C": -11.453125, |
| "D": -11.5390625, |
| "E": -11.8203125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.51171875, |
| "scores": { |
| "A": -7.28125, |
| "B": -7.4921875, |
| "C": -8.8203125, |
| "D": -6.98046875, |
| "E": -7.3046875 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.71484375, |
| "scores": { |
| "A": -8.359375, |
| "B": -6.64453125, |
| "C": -8.578125, |
| "D": -8.796875, |
| "E": -9.3125 |
| } |
| }, |
| "transfer_donor_ex_id": "commonsenseqa-validation-61", |
| "transfer_donor_gold": "A", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.375, |
| "scores": { |
| "A": -8.359375, |
| "B": -6.984375, |
| "C": -8.78125, |
| "D": -8.8671875, |
| "E": -9.4453125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-120", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.3515625, |
| "scores": { |
| "A": -12.625, |
| "B": -10.171875, |
| "C": -10.5234375, |
| "D": -11.96875, |
| "E": -12.625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.84375, |
| "scores": { |
| "A": -7.4375, |
| "B": -12.28125, |
| "C": -11.75, |
| "D": -11.984375, |
| "E": -13.75 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.234375, |
| "scores": { |
| "A": -12.015625, |
| "B": -8.328125, |
| "C": -8.09375, |
| "D": -10.21875, |
| "E": -10.296875 |
| } |
| }, |
| "transfer_donor_ex_id": "strategyqa-test-7", |
| "transfer_donor_gold": "NO", |
| "patched_self": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.5, |
| "scores": { |
| "A": -12.2734375, |
| "B": -9.34375, |
| "C": -8.84375, |
| "D": -10.6796875, |
| "E": -10.75 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-122", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.671875, |
| "scores": { |
| "A": -11.09375, |
| "B": -10.421875, |
| "C": -13.25, |
| "D": -13.296875, |
| "E": -13.5 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.328125, |
| "scores": { |
| "A": -6.90625, |
| "B": -9.234375, |
| "C": -12.421875, |
| "D": -11.0, |
| "E": -12.1796875 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.4375, |
| "scores": { |
| "A": -9.5546875, |
| "B": -9.1171875, |
| "C": -11.0, |
| "D": -11.3125, |
| "E": -12.390625 |
| } |
| }, |
| "transfer_donor_ex_id": "gsm8k-test-21", |
| "transfer_donor_gold": "3", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.328125, |
| "scores": { |
| "A": -10.2265625, |
| "B": -9.8984375, |
| "C": -12.0234375, |
| "D": -12.0859375, |
| "E": -13.40625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-123", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.9296875, |
| "scores": { |
| "A": -12.8125, |
| "B": -13.265625, |
| "C": -10.09375, |
| "D": -12.0234375, |
| "E": -12.84375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.0625, |
| "scores": { |
| "A": -7.71875, |
| "B": -10.9296875, |
| "C": -11.78125, |
| "D": -11.5546875, |
| "E": -13.9375 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.1953125, |
| "scores": { |
| "A": -11.609375, |
| "B": -11.1875, |
| "C": -10.9921875, |
| "D": -11.9765625, |
| "E": -12.8125 |
| } |
| }, |
| "transfer_donor_ex_id": "commonsenseqa-validation-12", |
| "transfer_donor_gold": "E", |
| "patched_self": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.1171875, |
| "scores": { |
| "A": -11.625, |
| "B": -11.296875, |
| "C": -11.1796875, |
| "D": -12.0078125, |
| "E": -12.8671875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-125", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.25, |
| "scores": { |
| "A": -12.8984375, |
| "B": -12.015625, |
| "C": -10.3671875, |
| "D": -10.6171875, |
| "E": -11.515625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.16796875, |
| "scores": { |
| "A": -6.90234375, |
| "B": -8.6875, |
| "C": -10.0703125, |
| "D": -9.84375, |
| "E": -10.1640625 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.31640625, |
| "scores": { |
| "A": -9.65625, |
| "B": -8.28125, |
| "C": -7.96484375, |
| "D": -9.09375, |
| "E": -9.828125 |
| } |
| }, |
| "transfer_donor_ex_id": "strategyqa-test-17", |
| "transfer_donor_gold": "YES", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.2421875, |
| "scores": { |
| "A": -9.578125, |
| "B": -8.921875, |
| "C": -9.1640625, |
| "D": -9.5703125, |
| "E": -10.484375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-130", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.5625, |
| "scores": { |
| "A": -12.53125, |
| "B": -11.625, |
| "C": -14.40625, |
| "D": -11.0625, |
| "E": -12.203125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.62890625, |
| "scores": { |
| "A": -7.70703125, |
| "B": -7.73046875, |
| "C": -10.4296875, |
| "D": -8.3359375, |
| "E": -8.21875 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.234375, |
| "scores": { |
| "A": -10.640625, |
| "B": -8.5390625, |
| "C": -9.2578125, |
| "D": -9.7734375, |
| "E": -9.6796875 |
| } |
| }, |
| "transfer_donor_ex_id": "strategyqa-test-2", |
| "transfer_donor_gold": "NO", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.875, |
| "scores": { |
| "A": -10.1953125, |
| "B": -8.4921875, |
| "C": -9.40625, |
| "D": -9.3671875, |
| "E": -9.40625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-140", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.09375, |
| "scores": { |
| "A": -12.171875, |
| "B": -10.953125, |
| "C": -12.484375, |
| "D": -12.046875, |
| "E": -12.828125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.09375, |
| "scores": { |
| "A": -8.640625, |
| "B": -9.734375, |
| "C": -10.828125, |
| "D": -10.234375, |
| "E": -11.625 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.296875, |
| "scores": { |
| "A": -9.4921875, |
| "B": -8.1953125, |
| "C": -9.5546875, |
| "D": -10.21875, |
| "E": -10.90625 |
| } |
| }, |
| "transfer_donor_ex_id": "commonsenseqa-validation-30", |
| "transfer_donor_gold": "E", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1328125, |
| "scores": { |
| "A": -9.6015625, |
| "B": -8.46875, |
| "C": -9.8515625, |
| "D": -10.34375, |
| "E": -11.0546875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-141", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.390625, |
| "scores": { |
| "A": -15.65625, |
| "B": -14.0, |
| "C": -12.3359375, |
| "D": -12.7265625, |
| "E": -13.421875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "E", |
| "correct": false, |
| "margin": -1.5078125, |
| "scores": { |
| "A": -9.03125, |
| "B": -10.1640625, |
| "C": -9.59375, |
| "D": -8.109375, |
| "E": -8.0859375 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 1.4453125, |
| "scores": { |
| "A": -13.15625, |
| "B": -10.59375, |
| "C": -8.6328125, |
| "D": -10.078125, |
| "E": -10.09375 |
| } |
| }, |
| "transfer_donor_ex_id": "strategyqa-test-3", |
| "transfer_donor_gold": "NO", |
| "patched_self": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.8984375, |
| "scores": { |
| "A": -12.7734375, |
| "B": -10.578125, |
| "C": -8.90625, |
| "D": -9.8046875, |
| "E": -9.9921875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-148", |
| "gold": "D", |
| "baseline": { |
| "pred_label": "D", |
| "correct": true, |
| "margin": 0.09375, |
| "scores": { |
| "A": -11.84375, |
| "B": -8.6875, |
| "C": -9.390625, |
| "D": -8.59375, |
| "E": -10.328125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -0.2890625, |
| "scores": { |
| "A": -8.21875, |
| "B": -9.21875, |
| "C": -9.4140625, |
| "D": -8.5078125, |
| "E": -9.5859375 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.1171875, |
| "scores": { |
| "A": -9.265625, |
| "B": -6.796875, |
| "C": -7.125, |
| "D": -7.9140625, |
| "E": -8.96875 |
| } |
| }, |
| "transfer_donor_ex_id": "gsm8k-test-58", |
| "transfer_donor_gold": "4", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -1.03125, |
| "scores": { |
| "A": -9.2578125, |
| "B": -6.734375, |
| "C": -7.015625, |
| "D": -7.765625, |
| "E": -8.8828125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-152", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.65625, |
| "scores": { |
| "A": -12.15625, |
| "B": -11.09375, |
| "C": -11.75, |
| "D": -11.765625, |
| "E": -11.75 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.6015625, |
| "scores": { |
| "A": -8.2734375, |
| "B": -12.875, |
| "C": -16.15625, |
| "D": -13.109375, |
| "E": -13.90625 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.046875, |
| "scores": { |
| "A": -9.265625, |
| "B": -7.828125, |
| "C": -8.875, |
| "D": -9.4375, |
| "E": -9.15625 |
| } |
| }, |
| "transfer_donor_ex_id": "strategyqa-test-46", |
| "transfer_donor_gold": "YES", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7734375, |
| "scores": { |
| "A": -10.3125, |
| "B": -9.5, |
| "C": -10.7109375, |
| "D": -10.359375, |
| "E": -10.2734375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-167", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.28125, |
| "scores": { |
| "A": -13.3125, |
| "B": -10.640625, |
| "C": -12.921875, |
| "D": -16.09375, |
| "E": -14.75 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.328125, |
| "scores": { |
| "A": -8.015625, |
| "B": -11.34375, |
| "C": -15.125, |
| "D": -13.3125, |
| "E": -14.375 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.7109375, |
| "scores": { |
| "A": -11.7578125, |
| "B": -9.7890625, |
| "C": -11.5, |
| "D": -13.203125, |
| "E": -13.0703125 |
| } |
| }, |
| "transfer_donor_ex_id": "commonsenseqa-validation-52", |
| "transfer_donor_gold": "A", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.7265625, |
| "scores": { |
| "A": -11.875, |
| "B": -10.1484375, |
| "C": -11.890625, |
| "D": -13.453125, |
| "E": -13.28125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-178", |
| "gold": "E", |
| "baseline": { |
| "pred_label": "E", |
| "correct": true, |
| "margin": 0.4609375, |
| "scores": { |
| "A": -13.1953125, |
| "B": -11.1015625, |
| "C": -12.8203125, |
| "D": -12.625, |
| "E": -10.640625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.72265625, |
| "scores": { |
| "A": -5.77734375, |
| "B": -10.375, |
| "C": -8.5, |
| "D": -10.84375, |
| "E": -12.5 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -3.421875, |
| "scores": { |
| "A": -10.203125, |
| "B": -5.3203125, |
| "C": -6.4765625, |
| "D": -9.546875, |
| "E": -8.7421875 |
| } |
| }, |
| "transfer_donor_ex_id": "gsm8k-test-8", |
| "transfer_donor_gold": "25", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -2.9375, |
| "scores": { |
| "A": -9.796875, |
| "B": -5.3828125, |
| "C": -6.3046875, |
| "D": -9.1640625, |
| "E": -8.3203125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-181", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.7734375, |
| "scores": { |
| "A": -9.6328125, |
| "B": -8.859375, |
| "C": -11.828125, |
| "D": -11.640625, |
| "E": -10.6875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.5078125, |
| "scores": { |
| "A": -7.5546875, |
| "B": -9.0625, |
| "C": -10.4453125, |
| "D": -9.140625, |
| "E": -9.0078125 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.234375, |
| "scores": { |
| "A": -9.703125, |
| "B": -8.46875, |
| "C": -9.71875, |
| "D": -10.609375, |
| "E": -9.875 |
| } |
| }, |
| "transfer_donor_ex_id": "commonsenseqa-validation-17", |
| "transfer_donor_gold": "B", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1796875, |
| "scores": { |
| "A": -9.7265625, |
| "B": -8.3984375, |
| "C": -9.578125, |
| "D": -10.4609375, |
| "E": -9.8359375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-183", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.40625, |
| "scores": { |
| "A": -12.6484375, |
| "B": -9.3671875, |
| "C": -10.7734375, |
| "D": -13.140625, |
| "E": -13.125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.71875, |
| "scores": { |
| "A": -8.6640625, |
| "B": -7.5078125, |
| "C": -6.7890625, |
| "D": -9.3046875, |
| "E": -10.7578125 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.53125, |
| "scores": { |
| "A": -10.5625, |
| "B": -6.87890625, |
| "C": -7.41015625, |
| "D": -10.3125, |
| "E": -11.34375 |
| } |
| }, |
| "transfer_donor_ex_id": "gsm8k-test-17", |
| "transfer_donor_gold": "350", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.625, |
| "scores": { |
| "A": -10.9375, |
| "B": -7.3046875, |
| "C": -7.9296875, |
| "D": -10.609375, |
| "E": -11.703125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-189", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.2109375, |
| "scores": { |
| "A": -14.5390625, |
| "B": -11.546875, |
| "C": -11.8046875, |
| "D": -11.7578125, |
| "E": -13.34375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "D", |
| "correct": false, |
| "margin": -0.84375, |
| "scores": { |
| "A": -9.875, |
| "B": -10.3828125, |
| "C": -10.28125, |
| "D": -9.5390625, |
| "E": -9.5625 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.5546875, |
| "scores": { |
| "A": -11.5625, |
| "B": -9.5625, |
| "C": -9.0078125, |
| "D": -9.9375, |
| "E": -9.8125 |
| } |
| }, |
| "transfer_donor_ex_id": "commonsenseqa-validation-24", |
| "transfer_donor_gold": "B", |
| "patched_self": { |
| "pred_label": "C", |
| "correct": false, |
| "margin": -0.7265625, |
| "scores": { |
| "A": -11.609375, |
| "B": -9.78125, |
| "C": -9.0546875, |
| "D": -9.8984375, |
| "E": -9.7109375 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-190", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.34375, |
| "scores": { |
| "A": -13.6015625, |
| "B": -10.7734375, |
| "C": -10.4296875, |
| "D": -13.3671875, |
| "E": -13.9296875 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -6.5, |
| "scores": { |
| "A": -6.3515625, |
| "B": -9.2578125, |
| "C": -12.8515625, |
| "D": -10.84375, |
| "E": -12.953125 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.28125, |
| "scores": { |
| "A": -10.484375, |
| "B": -9.234375, |
| "C": -8.953125, |
| "D": -10.9375, |
| "E": -11.5078125 |
| } |
| }, |
| "transfer_donor_ex_id": "strategyqa-test-41", |
| "transfer_donor_gold": "NO", |
| "patched_self": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.2890625, |
| "scores": { |
| "A": -10.6328125, |
| "B": -9.8515625, |
| "C": -9.5625, |
| "D": -10.96875, |
| "E": -11.5 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-191", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.953125, |
| "scores": { |
| "A": -11.890625, |
| "B": -10.9375, |
| "C": -13.640625, |
| "D": -14.109375, |
| "E": -13.765625 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.09375, |
| "scores": { |
| "A": -6.4375, |
| "B": -8.53125, |
| "C": -11.7734375, |
| "D": -11.28125, |
| "E": -12.3203125 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.13671875, |
| "scores": { |
| "A": -7.1484375, |
| "B": -6.01171875, |
| "C": -7.5625, |
| "D": -8.875, |
| "E": -9.078125 |
| } |
| }, |
| "transfer_donor_ex_id": "gsm8k-test-11", |
| "transfer_donor_gold": "10", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.0859375, |
| "scores": { |
| "A": -8.28125, |
| "B": -7.1953125, |
| "C": -8.546875, |
| "D": -9.734375, |
| "E": -9.796875 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-206", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.1796875, |
| "scores": { |
| "A": -11.6171875, |
| "B": -10.96875, |
| "C": -11.1484375, |
| "D": -12.84375, |
| "E": -14.0 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.03125, |
| "scores": { |
| "A": -8.0, |
| "B": -11.03125, |
| "C": -12.328125, |
| "D": -11.765625, |
| "E": -14.171875 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.1796875, |
| "scores": { |
| "A": -10.859375, |
| "B": -9.6796875, |
| "C": -11.0546875, |
| "D": -12.25, |
| "E": -13.625 |
| } |
| }, |
| "transfer_donor_ex_id": "strategyqa-test-42", |
| "transfer_donor_gold": "YES", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.8046875, |
| "scores": { |
| "A": -11.140625, |
| "B": -10.3359375, |
| "C": -11.6484375, |
| "D": -12.53125, |
| "E": -14.140625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-212", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.5078125, |
| "scores": { |
| "A": -11.59375, |
| "B": -9.609375, |
| "C": -11.1171875, |
| "D": -11.7421875, |
| "E": -12.8359375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -3.390625, |
| "scores": { |
| "A": -4.984375, |
| "B": -8.375, |
| "C": -11.28125, |
| "D": -9.4140625, |
| "E": -11.890625 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.83984375, |
| "scores": { |
| "A": -9.765625, |
| "B": -6.72265625, |
| "C": -8.5625, |
| "D": -9.34375, |
| "E": -9.75 |
| } |
| }, |
| "transfer_donor_ex_id": "gsm8k-test-41", |
| "transfer_donor_gold": "131250", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 1.48828125, |
| "scores": { |
| "A": -10.0, |
| "B": -7.37890625, |
| "C": -8.8671875, |
| "D": -9.53125, |
| "E": -9.8125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-223", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 3.09375, |
| "scores": { |
| "A": -10.265625, |
| "B": -7.1484375, |
| "C": -10.2421875, |
| "D": -10.921875, |
| "E": -11.3359375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -4.09765625, |
| "scores": { |
| "A": -4.67578125, |
| "B": -8.7734375, |
| "C": -12.375, |
| "D": -11.1796875, |
| "E": -13.109375 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.51171875, |
| "scores": { |
| "A": -10.9375, |
| "B": -5.73828125, |
| "C": -8.25, |
| "D": -10.328125, |
| "E": -10.53125 |
| } |
| }, |
| "transfer_donor_ex_id": "strategyqa-test-62", |
| "transfer_donor_gold": "YES", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 2.51171875, |
| "scores": { |
| "A": -10.4375, |
| "B": -5.91015625, |
| "C": -8.421875, |
| "D": -9.984375, |
| "E": -10.328125 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-228", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.078125, |
| "scores": { |
| "A": -13.3125, |
| "B": -10.71875, |
| "C": -10.796875, |
| "D": -14.0859375, |
| "E": -14.8984375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -1.28125, |
| "scores": { |
| "A": -7.171875, |
| "B": -8.453125, |
| "C": -9.515625, |
| "D": -10.125, |
| "E": -8.953125 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.98828125, |
| "scores": { |
| "A": -8.703125, |
| "B": -6.40234375, |
| "C": -7.390625, |
| "D": -9.640625, |
| "E": -9.0546875 |
| } |
| }, |
| "transfer_donor_ex_id": "gsm8k-test-18", |
| "transfer_donor_gold": "260", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.796875, |
| "scores": { |
| "A": -8.9296875, |
| "B": -6.5703125, |
| "C": -7.3671875, |
| "D": -9.640625, |
| "E": -9.15625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-249", |
| "gold": "C", |
| "baseline": { |
| "pred_label": "C", |
| "correct": true, |
| "margin": 0.828125, |
| "scores": { |
| "A": -10.328125, |
| "B": -8.859375, |
| "C": -8.03125, |
| "D": -11.640625, |
| "E": -11.109375 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.59375, |
| "scores": { |
| "A": -5.9921875, |
| "B": -7.609375, |
| "C": -8.5859375, |
| "D": -8.4375, |
| "E": -8.6796875 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.4375, |
| "scores": { |
| "A": -8.703125, |
| "B": -6.5078125, |
| "C": -6.9453125, |
| "D": -9.828125, |
| "E": -9.34375 |
| } |
| }, |
| "transfer_donor_ex_id": "gsm8k-test-5", |
| "transfer_donor_gold": "168", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": false, |
| "margin": -0.34375, |
| "scores": { |
| "A": -8.9921875, |
| "B": -6.984375, |
| "C": -7.328125, |
| "D": -10.1171875, |
| "E": -9.765625 |
| } |
| } |
| }, |
| { |
| "ex_id": "aqua-test-251", |
| "gold": "B", |
| "baseline": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.078125, |
| "scores": { |
| "A": -9.90625, |
| "B": -9.828125, |
| "C": -12.40625, |
| "D": -11.265625, |
| "E": -10.703125 |
| } |
| }, |
| "ablated_1": { |
| "pred_label": "A", |
| "correct": false, |
| "margin": -2.984375, |
| "scores": { |
| "A": -9.296875, |
| "B": -12.28125, |
| "C": -13.5, |
| "D": -12.09375, |
| "E": -11.140625 |
| } |
| }, |
| "patched_transfer": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.4296875, |
| "scores": { |
| "A": -8.9765625, |
| "B": -8.546875, |
| "C": -10.03125, |
| "D": -10.625, |
| "E": -9.125 |
| } |
| }, |
| "transfer_donor_ex_id": "gsm8k-test-45", |
| "transfer_donor_gold": "11", |
| "patched_self": { |
| "pred_label": "B", |
| "correct": true, |
| "margin": 0.53125, |
| "scores": { |
| "A": -9.375, |
| "B": -8.8203125, |
| "C": -10.15625, |
| "D": -10.6484375, |
| "E": -9.3515625 |
| } |
| } |
| } |
| ] |
| } |