| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "65b37170-42a3-4f63-bfdc-b4728a912b3a", |
| "datetime_epoch_millis": 1745619438226, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.08117500543594361, |
| "tpp_threshold_2_intended_diff_only": 0.09920001029968262, |
| "tpp_threshold_2_unintended_diff_only": 0.018025004863739015, |
| "tpp_threshold_5_total_metric": 0.14222501069307328, |
| "tpp_threshold_5_intended_diff_only": 0.17980000972747803, |
| "tpp_threshold_5_unintended_diff_only": 0.037574999034404755, |
| "tpp_threshold_10_total_metric": 0.1682000070810318, |
| "tpp_threshold_10_intended_diff_only": 0.24000000953674316, |
| "tpp_threshold_10_unintended_diff_only": 0.07180000245571136, |
| "tpp_threshold_20_total_metric": 0.18482501655817032, |
| "tpp_threshold_20_intended_diff_only": 0.2980000197887421, |
| "tpp_threshold_20_unintended_diff_only": 0.11317500323057174, |
| "tpp_threshold_50_total_metric": 0.22115001976490023, |
| "tpp_threshold_50_intended_diff_only": 0.3771000266075134, |
| "tpp_threshold_50_unintended_diff_only": 0.15595000684261323, |
| "tpp_threshold_100_total_metric": 0.2330000028014183, |
| "tpp_threshold_100_intended_diff_only": 0.4121000111103058, |
| "tpp_threshold_100_unintended_diff_only": 0.1791000083088875, |
| "tpp_threshold_500_total_metric": 0.22052503228187562, |
| "tpp_threshold_500_intended_diff_only": 0.44700004458427434, |
| "tpp_threshold_500_unintended_diff_only": 0.2264750123023987 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.1408000111579895, |
| "tpp_threshold_2_intended_diff_only": 0.17180001735687256, |
| "tpp_threshold_2_unintended_diff_only": 0.031000006198883056, |
| "tpp_threshold_5_total_metric": 0.23640001714229583, |
| "tpp_threshold_5_intended_diff_only": 0.305400013923645, |
| "tpp_threshold_5_unintended_diff_only": 0.06899999678134919, |
| "tpp_threshold_10_total_metric": 0.24945000410079957, |
| "tpp_threshold_10_intended_diff_only": 0.3842000126838684, |
| "tpp_threshold_10_unintended_diff_only": 0.13475000858306885, |
| "tpp_threshold_20_total_metric": 0.22425001859664917, |
| "tpp_threshold_20_intended_diff_only": 0.4284000277519226, |
| "tpp_threshold_20_unintended_diff_only": 0.20415000915527343, |
| "tpp_threshold_50_total_metric": 0.17915000915527343, |
| "tpp_threshold_50_intended_diff_only": 0.459000027179718, |
| "tpp_threshold_50_unintended_diff_only": 0.2798500180244446, |
| "tpp_threshold_100_total_metric": 0.14975000023841858, |
| "tpp_threshold_100_intended_diff_only": 0.46560001373291016, |
| "tpp_threshold_100_unintended_diff_only": 0.3158500134944916, |
| "tpp_threshold_500_total_metric": 0.08950002193450927, |
| "tpp_threshold_500_intended_diff_only": 0.46680004596710206, |
| "tpp_threshold_500_unintended_diff_only": 0.3773000240325928 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.021549999713897705, |
| "tpp_threshold_2_intended_diff_only": 0.026600003242492676, |
| "tpp_threshold_2_unintended_diff_only": 0.005050003528594971, |
| "tpp_threshold_5_total_metric": 0.04805000424385071, |
| "tpp_threshold_5_intended_diff_only": 0.05420000553131103, |
| "tpp_threshold_5_unintended_diff_only": 0.006150001287460327, |
| "tpp_threshold_10_total_metric": 0.08695001006126404, |
| "tpp_threshold_10_intended_diff_only": 0.09580000638961791, |
| "tpp_threshold_10_unintended_diff_only": 0.008849996328353881, |
| "tpp_threshold_20_total_metric": 0.14540001451969148, |
| "tpp_threshold_20_intended_diff_only": 0.16760001182556153, |
| "tpp_threshold_20_unintended_diff_only": 0.022199997305870058, |
| "tpp_threshold_50_total_metric": 0.263150030374527, |
| "tpp_threshold_50_intended_diff_only": 0.2952000260353088, |
| "tpp_threshold_50_unintended_diff_only": 0.03204999566078186, |
| "tpp_threshold_100_total_metric": 0.31625000536441805, |
| "tpp_threshold_100_intended_diff_only": 0.3586000084877014, |
| "tpp_threshold_100_unintended_diff_only": 0.04235000312328339, |
| "tpp_threshold_500_total_metric": 0.35155004262924194, |
| "tpp_threshold_500_intended_diff_only": 0.42720004320144656, |
| "tpp_threshold_500_unintended_diff_only": 0.07565000057220458 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.17.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-snap-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.17.hook_resid_post", |
| "hook_layer": 17, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.21550005674362183, |
| "tpp_threshold_2_intended_diff_only": 0.2640000581741333, |
| "tpp_threshold_2_unintended_diff_only": 0.048500001430511475, |
| "tpp_threshold_5_total_metric": 0.23600007593631744, |
| "tpp_threshold_5_intended_diff_only": 0.3190000653266907, |
| "tpp_threshold_5_unintended_diff_only": 0.08299998939037323, |
| "tpp_threshold_10_total_metric": 0.22100003063678741, |
| "tpp_threshold_10_intended_diff_only": 0.3540000319480896, |
| "tpp_threshold_10_unintended_diff_only": 0.13300000131130219, |
| "tpp_threshold_20_total_metric": 0.19575007259845734, |
| "tpp_threshold_20_intended_diff_only": 0.4030000567436218, |
| "tpp_threshold_20_unintended_diff_only": 0.2072499841451645, |
| "tpp_threshold_50_total_metric": 0.1637500524520874, |
| "tpp_threshold_50_intended_diff_only": 0.43400007486343384, |
| "tpp_threshold_50_unintended_diff_only": 0.27025002241134644, |
| "tpp_threshold_100_total_metric": 0.13875003159046173, |
| "tpp_threshold_100_intended_diff_only": 0.44600003957748413, |
| "tpp_threshold_100_unintended_diff_only": 0.3072500079870224, |
| "tpp_threshold_500_total_metric": 0.06450006365776062, |
| "tpp_threshold_500_intended_diff_only": 0.44800007343292236, |
| "tpp_threshold_500_unintended_diff_only": 0.38350000977516174 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.18425001204013824, |
| "tpp_threshold_2_intended_diff_only": 0.24900001287460327, |
| "tpp_threshold_2_unintended_diff_only": 0.06475000083446503, |
| "tpp_threshold_5_total_metric": 0.22200007736682892, |
| "tpp_threshold_5_intended_diff_only": 0.28700006008148193, |
| "tpp_threshold_5_unintended_diff_only": 0.06499998271465302, |
| "tpp_threshold_10_total_metric": 0.19175003468990326, |
| "tpp_threshold_10_intended_diff_only": 0.3540000319480896, |
| "tpp_threshold_10_unintended_diff_only": 0.16224999725818634, |
| "tpp_threshold_20_total_metric": 0.20000006258487701, |
| "tpp_threshold_20_intended_diff_only": 0.40800005197525024, |
| "tpp_threshold_20_unintended_diff_only": 0.20799998939037323, |
| "tpp_threshold_50_total_metric": 0.1250000298023224, |
| "tpp_threshold_50_intended_diff_only": 0.4500000476837158, |
| "tpp_threshold_50_unintended_diff_only": 0.32500001788139343, |
| "tpp_threshold_100_total_metric": 0.09750001132488251, |
| "tpp_threshold_100_intended_diff_only": 0.45600003004074097, |
| "tpp_threshold_100_unintended_diff_only": 0.35850001871585846, |
| "tpp_threshold_500_total_metric": 0.06275005638599396, |
| "tpp_threshold_500_intended_diff_only": 0.4580000638961792, |
| "tpp_threshold_500_unintended_diff_only": 0.39525000751018524 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.138000026345253, |
| "tpp_threshold_2_intended_diff_only": 0.1690000295639038, |
| "tpp_threshold_2_unintended_diff_only": 0.031000003218650818, |
| "tpp_threshold_5_total_metric": 0.2239999920129776, |
| "tpp_threshold_5_intended_diff_only": 0.33399999141693115, |
| "tpp_threshold_5_unintended_diff_only": 0.10999999940395355, |
| "tpp_threshold_10_total_metric": 0.22874999046325684, |
| "tpp_threshold_10_intended_diff_only": 0.3889999985694885, |
| "tpp_threshold_10_unintended_diff_only": 0.1602500081062317, |
| "tpp_threshold_20_total_metric": 0.20675000548362732, |
| "tpp_threshold_20_intended_diff_only": 0.4190000295639038, |
| "tpp_threshold_20_unintended_diff_only": 0.2122500240802765, |
| "tpp_threshold_50_total_metric": 0.1694999784231186, |
| "tpp_threshold_50_intended_diff_only": 0.4440000057220459, |
| "tpp_threshold_50_unintended_diff_only": 0.2745000272989273, |
| "tpp_threshold_100_total_metric": 0.12824997305870056, |
| "tpp_threshold_100_intended_diff_only": 0.45399999618530273, |
| "tpp_threshold_100_unintended_diff_only": 0.3257500231266022, |
| "tpp_threshold_500_total_metric": 0.06000000238418579, |
| "tpp_threshold_500_intended_diff_only": 0.45500004291534424, |
| "tpp_threshold_500_unintended_diff_only": 0.39500004053115845 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.14649999141693115, |
| "tpp_threshold_2_intended_diff_only": 0.15700000524520874, |
| "tpp_threshold_2_unintended_diff_only": 0.010500013828277588, |
| "tpp_threshold_5_total_metric": 0.3764999657869339, |
| "tpp_threshold_5_intended_diff_only": 0.4509999752044678, |
| "tpp_threshold_5_unintended_diff_only": 0.07450000941753387, |
| "tpp_threshold_10_total_metric": 0.3559999614953995, |
| "tpp_threshold_10_intended_diff_only": 0.4829999804496765, |
| "tpp_threshold_10_unintended_diff_only": 0.12700001895427704, |
| "tpp_threshold_20_total_metric": 0.3149999678134918, |
| "tpp_threshold_20_intended_diff_only": 0.4909999966621399, |
| "tpp_threshold_20_unintended_diff_only": 0.17600002884864807, |
| "tpp_threshold_50_total_metric": 0.2502500116825104, |
| "tpp_threshold_50_intended_diff_only": 0.49400001764297485, |
| "tpp_threshold_50_unintended_diff_only": 0.24375000596046448, |
| "tpp_threshold_100_total_metric": 0.2110000103712082, |
| "tpp_threshold_100_intended_diff_only": 0.49400001764297485, |
| "tpp_threshold_100_unintended_diff_only": 0.28300000727176666, |
| "tpp_threshold_500_total_metric": 0.14274998009204865, |
| "tpp_threshold_500_intended_diff_only": 0.49400001764297485, |
| "tpp_threshold_500_unintended_diff_only": 0.3512500375509262 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.019749969244003296, |
| "tpp_threshold_2_intended_diff_only": 0.019999980926513672, |
| "tpp_threshold_2_unintended_diff_only": 0.000250011682510376, |
| "tpp_threshold_5_total_metric": 0.12349997460842133, |
| "tpp_threshold_5_intended_diff_only": 0.13599997758865356, |
| "tpp_threshold_5_unintended_diff_only": 0.012500002980232239, |
| "tpp_threshold_10_total_metric": 0.24975000321865082, |
| "tpp_threshold_10_intended_diff_only": 0.3410000205039978, |
| "tpp_threshold_10_unintended_diff_only": 0.09125001728534698, |
| "tpp_threshold_20_total_metric": 0.20374998450279236, |
| "tpp_threshold_20_intended_diff_only": 0.42100000381469727, |
| "tpp_threshold_20_unintended_diff_only": 0.2172500193119049, |
| "tpp_threshold_50_total_metric": 0.18724997341632843, |
| "tpp_threshold_50_intended_diff_only": 0.4729999899864197, |
| "tpp_threshold_50_unintended_diff_only": 0.28575001657009125, |
| "tpp_threshold_100_total_metric": 0.1732499748468399, |
| "tpp_threshold_100_intended_diff_only": 0.4779999852180481, |
| "tpp_threshold_100_unintended_diff_only": 0.3047500103712082, |
| "tpp_threshold_500_total_metric": 0.11750000715255737, |
| "tpp_threshold_500_intended_diff_only": 0.4790000319480896, |
| "tpp_threshold_500_unintended_diff_only": 0.3615000247955322 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.003500029444694519, |
| "tpp_threshold_2_intended_diff_only": 0.008000016212463379, |
| "tpp_threshold_2_unintended_diff_only": 0.00449998676776886, |
| "tpp_threshold_5_total_metric": 0.01250004768371582, |
| "tpp_threshold_5_intended_diff_only": 0.01900005340576172, |
| "tpp_threshold_5_unintended_diff_only": 0.0065000057220458984, |
| "tpp_threshold_10_total_metric": 0.021250024437904358, |
| "tpp_threshold_10_intended_diff_only": 0.027000010013580322, |
| "tpp_threshold_10_unintended_diff_only": 0.005749985575675964, |
| "tpp_threshold_20_total_metric": 0.08750005066394806, |
| "tpp_threshold_20_intended_diff_only": 0.1170000433921814, |
| "tpp_threshold_20_unintended_diff_only": 0.029499992728233337, |
| "tpp_threshold_50_total_metric": 0.24875007569789886, |
| "tpp_threshold_50_intended_diff_only": 0.2820000648498535, |
| "tpp_threshold_50_unintended_diff_only": 0.03324998915195465, |
| "tpp_threshold_100_total_metric": 0.32975004613399506, |
| "tpp_threshold_100_intended_diff_only": 0.3760000467300415, |
| "tpp_threshold_100_unintended_diff_only": 0.04625000059604645, |
| "tpp_threshold_500_total_metric": 0.39975006878376007, |
| "tpp_threshold_500_intended_diff_only": 0.4530000686645508, |
| "tpp_threshold_500_unintended_diff_only": 0.05324999988079071 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.04174995422363281, |
| "tpp_threshold_2_intended_diff_only": 0.04999995231628418, |
| "tpp_threshold_2_unintended_diff_only": 0.008249998092651367, |
| "tpp_threshold_5_total_metric": 0.07874998450279236, |
| "tpp_threshold_5_intended_diff_only": 0.08399999141693115, |
| "tpp_threshold_5_unintended_diff_only": 0.005250006914138794, |
| "tpp_threshold_10_total_metric": 0.12474998831748962, |
| "tpp_threshold_10_intended_diff_only": 0.13499999046325684, |
| "tpp_threshold_10_unintended_diff_only": 0.010250002145767212, |
| "tpp_threshold_20_total_metric": 0.19350001215934753, |
| "tpp_threshold_20_intended_diff_only": 0.23500001430511475, |
| "tpp_threshold_20_unintended_diff_only": 0.04150000214576721, |
| "tpp_threshold_50_total_metric": 0.3062500059604645, |
| "tpp_threshold_50_intended_diff_only": 0.37400001287460327, |
| "tpp_threshold_50_unintended_diff_only": 0.0677500069141388, |
| "tpp_threshold_100_total_metric": 0.35124996304512024, |
| "tpp_threshold_100_intended_diff_only": 0.4229999780654907, |
| "tpp_threshold_100_unintended_diff_only": 0.07175001502037048, |
| "tpp_threshold_500_total_metric": 0.3460000306367874, |
| "tpp_threshold_500_intended_diff_only": 0.43800002336502075, |
| "tpp_threshold_500_unintended_diff_only": 0.09199999272823334 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": -0.001749977469444275, |
| "tpp_threshold_2_intended_diff_only": -0.001999974250793457, |
| "tpp_threshold_2_unintended_diff_only": -0.00024999678134918213, |
| "tpp_threshold_5_total_metric": -0.008999958634376526, |
| "tpp_threshold_5_intended_diff_only": -0.0029999613761901855, |
| "tpp_threshold_5_unintended_diff_only": 0.00599999725818634, |
| "tpp_threshold_10_total_metric": -0.003749936819076538, |
| "tpp_threshold_10_intended_diff_only": 0.001000046730041504, |
| "tpp_threshold_10_unintended_diff_only": 0.004749983549118042, |
| "tpp_threshold_20_total_metric": 0.014500007033348083, |
| "tpp_threshold_20_intended_diff_only": 0.018999993801116943, |
| "tpp_threshold_20_unintended_diff_only": 0.00449998676776886, |
| "tpp_threshold_50_total_metric": 0.07850006222724915, |
| "tpp_threshold_50_intended_diff_only": 0.08900004625320435, |
| "tpp_threshold_50_unintended_diff_only": 0.0104999840259552, |
| "tpp_threshold_100_total_metric": 0.18375001847743988, |
| "tpp_threshold_100_intended_diff_only": 0.203000009059906, |
| "tpp_threshold_100_unintended_diff_only": 0.019249990582466125, |
| "tpp_threshold_500_total_metric": 0.3147500604391098, |
| "tpp_threshold_500_intended_diff_only": 0.43000006675720215, |
| "tpp_threshold_500_unintended_diff_only": 0.11525000631809235 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": -0.010500013828277588, |
| "tpp_threshold_2_intended_diff_only": -0.004999995231628418, |
| "tpp_threshold_2_unintended_diff_only": 0.00550001859664917, |
| "tpp_threshold_5_total_metric": 0.06024998426437378, |
| "tpp_threshold_5_intended_diff_only": 0.06699997186660767, |
| "tpp_threshold_5_unintended_diff_only": 0.006749987602233887, |
| "tpp_threshold_10_total_metric": 0.13624994456768036, |
| "tpp_threshold_10_intended_diff_only": 0.14699995517730713, |
| "tpp_threshold_10_unintended_diff_only": 0.01075001060962677, |
| "tpp_threshold_20_total_metric": 0.18274998664855957, |
| "tpp_threshold_20_intended_diff_only": 0.19999998807907104, |
| "tpp_threshold_20_unintended_diff_only": 0.017250001430511475, |
| "tpp_threshold_50_total_metric": 0.3580000102519989, |
| "tpp_threshold_50_intended_diff_only": 0.375, |
| "tpp_threshold_50_unintended_diff_only": 0.0169999897480011, |
| "tpp_threshold_100_total_metric": 0.3799999803304672, |
| "tpp_threshold_100_intended_diff_only": 0.4089999794960022, |
| "tpp_threshold_100_unintended_diff_only": 0.028999999165534973, |
| "tpp_threshold_500_total_metric": 0.3722500056028366, |
| "tpp_threshold_500_intended_diff_only": 0.42900002002716064, |
| "tpp_threshold_500_unintended_diff_only": 0.056750014424324036 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.07475000619888306, |
| "tpp_threshold_2_intended_diff_only": 0.0820000171661377, |
| "tpp_threshold_2_unintended_diff_only": 0.007250010967254639, |
| "tpp_threshold_5_total_metric": 0.09774996340274811, |
| "tpp_threshold_5_intended_diff_only": 0.10399997234344482, |
| "tpp_threshold_5_unintended_diff_only": 0.006250008940696716, |
| "tpp_threshold_10_total_metric": 0.1562500298023224, |
| "tpp_threshold_10_intended_diff_only": 0.1690000295639038, |
| "tpp_threshold_10_unintended_diff_only": 0.012749999761581421, |
| "tpp_threshold_20_total_metric": 0.2487500160932541, |
| "tpp_threshold_20_intended_diff_only": 0.2670000195503235, |
| "tpp_threshold_20_unintended_diff_only": 0.018250003457069397, |
| "tpp_threshold_50_total_metric": 0.3242499977350235, |
| "tpp_threshold_50_intended_diff_only": 0.35600000619888306, |
| "tpp_threshold_50_unintended_diff_only": 0.03175000846385956, |
| "tpp_threshold_100_total_metric": 0.33650001883506775, |
| "tpp_threshold_100_intended_diff_only": 0.38200002908706665, |
| "tpp_threshold_100_unintended_diff_only": 0.0455000102519989, |
| "tpp_threshold_500_total_metric": 0.3250000476837158, |
| "tpp_threshold_500_intended_diff_only": 0.38600003719329834, |
| "tpp_threshold_500_unintended_diff_only": 0.06099998950958252 |
| } |
| } |
| } |
| } |