| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "91ffb92c-ec01-47b1-9f45-773ec12047b3", |
| "datetime_epoch_millis": 1745752684918, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.048400001227855684, |
| "tpp_threshold_2_intended_diff_only": 0.06080000400543213, |
| "tpp_threshold_2_unintended_diff_only": 0.012400002777576446, |
| "tpp_threshold_5_total_metric": 0.1058000087738037, |
| "tpp_threshold_5_intended_diff_only": 0.1442000091075897, |
| "tpp_threshold_5_unintended_diff_only": 0.03840000033378601, |
| "tpp_threshold_10_total_metric": 0.1433750107884407, |
| "tpp_threshold_10_intended_diff_only": 0.22120001316070556, |
| "tpp_threshold_10_unintended_diff_only": 0.07782500237226486, |
| "tpp_threshold_20_total_metric": 0.1572250097990036, |
| "tpp_threshold_20_intended_diff_only": 0.27350001931190493, |
| "tpp_threshold_20_unintended_diff_only": 0.11627500951290132, |
| "tpp_threshold_50_total_metric": 0.1887250140309334, |
| "tpp_threshold_50_intended_diff_only": 0.3616000235080719, |
| "tpp_threshold_50_unintended_diff_only": 0.17287500947713852, |
| "tpp_threshold_100_total_metric": 0.20962500721216204, |
| "tpp_threshold_100_intended_diff_only": 0.40600002408027647, |
| "tpp_threshold_100_unintended_diff_only": 0.19637501686811448, |
| "tpp_threshold_500_total_metric": 0.20867503732442855, |
| "tpp_threshold_500_intended_diff_only": 0.44170004725456236, |
| "tpp_threshold_500_unintended_diff_only": 0.2330250099301338 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.07679998874664307, |
| "tpp_threshold_2_intended_diff_only": 0.0997999906539917, |
| "tpp_threshold_2_unintended_diff_only": 0.023000001907348633, |
| "tpp_threshold_5_total_metric": 0.1800000101327896, |
| "tpp_threshold_5_intended_diff_only": 0.2520000100135803, |
| "tpp_threshold_5_unintended_diff_only": 0.07199999988079071, |
| "tpp_threshold_10_total_metric": 0.22305001616477965, |
| "tpp_threshold_10_intended_diff_only": 0.3720000147819519, |
| "tpp_threshold_10_unintended_diff_only": 0.14894999861717223, |
| "tpp_threshold_20_total_metric": 0.20475001335144044, |
| "tpp_threshold_20_intended_diff_only": 0.4224000215530396, |
| "tpp_threshold_20_unintended_diff_only": 0.21765000820159913, |
| "tpp_threshold_50_total_metric": 0.1442500203847885, |
| "tpp_threshold_50_intended_diff_only": 0.45700002908706666, |
| "tpp_threshold_50_unintended_diff_only": 0.31275000870227815, |
| "tpp_threshold_100_total_metric": 0.11514999866485595, |
| "tpp_threshold_100_intended_diff_only": 0.46440001726150515, |
| "tpp_threshold_100_unintended_diff_only": 0.34925001859664917, |
| "tpp_threshold_500_total_metric": 0.06950003206729889, |
| "tpp_threshold_500_intended_diff_only": 0.4670000433921814, |
| "tpp_threshold_500_unintended_diff_only": 0.3975000113248825 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.020000013709068298, |
| "tpp_threshold_2_intended_diff_only": 0.021800017356872557, |
| "tpp_threshold_2_unintended_diff_only": 0.0018000036478042602, |
| "tpp_threshold_5_total_metric": 0.03160000741481781, |
| "tpp_threshold_5_intended_diff_only": 0.036400008201599124, |
| "tpp_threshold_5_unintended_diff_only": 0.004800000786781311, |
| "tpp_threshold_10_total_metric": 0.06370000541210175, |
| "tpp_threshold_10_intended_diff_only": 0.07040001153945923, |
| "tpp_threshold_10_unintended_diff_only": 0.006700006127357483, |
| "tpp_threshold_20_total_metric": 0.10970000624656677, |
| "tpp_threshold_20_intended_diff_only": 0.12460001707077026, |
| "tpp_threshold_20_unintended_diff_only": 0.014900010824203492, |
| "tpp_threshold_50_total_metric": 0.23320000767707824, |
| "tpp_threshold_50_intended_diff_only": 0.26620001792907716, |
| "tpp_threshold_50_unintended_diff_only": 0.033000010251998904, |
| "tpp_threshold_100_total_metric": 0.3041000157594681, |
| "tpp_threshold_100_intended_diff_only": 0.34760003089904784, |
| "tpp_threshold_100_unintended_diff_only": 0.043500015139579774, |
| "tpp_threshold_500_total_metric": 0.3478500425815582, |
| "tpp_threshold_500_intended_diff_only": 0.41640005111694334, |
| "tpp_threshold_500_unintended_diff_only": 0.06855000853538513 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.14.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.14.hook_resid_post", |
| "hook_layer": 14, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.06824998557567596, |
| "tpp_threshold_2_intended_diff_only": 0.09200000762939453, |
| "tpp_threshold_2_unintended_diff_only": 0.023750022053718567, |
| "tpp_threshold_5_total_metric": 0.09974999725818634, |
| "tpp_threshold_5_intended_diff_only": 0.1290000081062317, |
| "tpp_threshold_5_unintended_diff_only": 0.02925001084804535, |
| "tpp_threshold_10_total_metric": 0.2435000091791153, |
| "tpp_threshold_10_intended_diff_only": 0.34700000286102295, |
| "tpp_threshold_10_unintended_diff_only": 0.10349999368190765, |
| "tpp_threshold_20_total_metric": 0.20500002801418304, |
| "tpp_threshold_20_intended_diff_only": 0.4050000309944153, |
| "tpp_threshold_20_unintended_diff_only": 0.20000000298023224, |
| "tpp_threshold_50_total_metric": 0.15525001287460327, |
| "tpp_threshold_50_intended_diff_only": 0.44700002670288086, |
| "tpp_threshold_50_unintended_diff_only": 0.2917500138282776, |
| "tpp_threshold_100_total_metric": 0.12324999272823334, |
| "tpp_threshold_100_intended_diff_only": 0.45600003004074097, |
| "tpp_threshold_100_unintended_diff_only": 0.33275003731250763, |
| "tpp_threshold_500_total_metric": 0.05775001645088196, |
| "tpp_threshold_500_intended_diff_only": 0.45600003004074097, |
| "tpp_threshold_500_unintended_diff_only": 0.398250013589859 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.1782499998807907, |
| "tpp_threshold_2_intended_diff_only": 0.23600000143051147, |
| "tpp_threshold_2_unintended_diff_only": 0.057750001549720764, |
| "tpp_threshold_5_total_metric": 0.18949998915195465, |
| "tpp_threshold_5_intended_diff_only": 0.2879999876022339, |
| "tpp_threshold_5_unintended_diff_only": 0.09849999845027924, |
| "tpp_threshold_10_total_metric": 0.20524999499320984, |
| "tpp_threshold_10_intended_diff_only": 0.33799999952316284, |
| "tpp_threshold_10_unintended_diff_only": 0.132750004529953, |
| "tpp_threshold_20_total_metric": 0.19575002789497375, |
| "tpp_threshold_20_intended_diff_only": 0.3960000276565552, |
| "tpp_threshold_20_unintended_diff_only": 0.20024999976158142, |
| "tpp_threshold_50_total_metric": 0.12450000643730164, |
| "tpp_threshold_50_intended_diff_only": 0.4350000023841858, |
| "tpp_threshold_50_unintended_diff_only": 0.31049999594688416, |
| "tpp_threshold_100_total_metric": 0.09799997508525848, |
| "tpp_threshold_100_intended_diff_only": 0.45399999618530273, |
| "tpp_threshold_100_unintended_diff_only": 0.35600002110004425, |
| "tpp_threshold_500_total_metric": 0.05950002372264862, |
| "tpp_threshold_500_intended_diff_only": 0.46000003814697266, |
| "tpp_threshold_500_unintended_diff_only": 0.40050001442432404 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.01925000548362732, |
| "tpp_threshold_2_intended_diff_only": 0.02399998903274536, |
| "tpp_threshold_2_unintended_diff_only": 0.004749983549118042, |
| "tpp_threshold_5_total_metric": 0.2200000286102295, |
| "tpp_threshold_5_intended_diff_only": 0.2900000214576721, |
| "tpp_threshold_5_unintended_diff_only": 0.06999999284744263, |
| "tpp_threshold_10_total_metric": 0.17750003933906555, |
| "tpp_threshold_10_intended_diff_only": 0.34400004148483276, |
| "tpp_threshold_10_unintended_diff_only": 0.1665000021457672, |
| "tpp_threshold_20_total_metric": 0.17200003564357758, |
| "tpp_threshold_20_intended_diff_only": 0.3810000419616699, |
| "tpp_threshold_20_unintended_diff_only": 0.20900000631809235, |
| "tpp_threshold_50_total_metric": 0.130000039935112, |
| "tpp_threshold_50_intended_diff_only": 0.44600003957748413, |
| "tpp_threshold_50_unintended_diff_only": 0.31599999964237213, |
| "tpp_threshold_100_total_metric": 0.09649999439716339, |
| "tpp_threshold_100_intended_diff_only": 0.453000009059906, |
| "tpp_threshold_100_unintended_diff_only": 0.3565000146627426, |
| "tpp_threshold_500_total_metric": 0.04675005376338959, |
| "tpp_threshold_500_intended_diff_only": 0.4540000557899475, |
| "tpp_threshold_500_unintended_diff_only": 0.4072500020265579 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.008499979972839355, |
| "tpp_threshold_2_intended_diff_only": 0.0059999823570251465, |
| "tpp_threshold_2_unintended_diff_only": -0.002499997615814209, |
| "tpp_threshold_5_total_metric": 0.18775001168251038, |
| "tpp_threshold_5_intended_diff_only": 0.22200000286102295, |
| "tpp_threshold_5_unintended_diff_only": 0.03424999117851257, |
| "tpp_threshold_10_total_metric": 0.3137500286102295, |
| "tpp_threshold_10_intended_diff_only": 0.43800002336502075, |
| "tpp_threshold_10_unintended_diff_only": 0.12424999475479126, |
| "tpp_threshold_20_total_metric": 0.23750001192092896, |
| "tpp_threshold_20_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_20_unintended_diff_only": 0.23750001192092896, |
| "tpp_threshold_50_total_metric": 0.16175003349781036, |
| "tpp_threshold_50_intended_diff_only": 0.4830000400543213, |
| "tpp_threshold_50_unintended_diff_only": 0.3212500065565109, |
| "tpp_threshold_100_total_metric": 0.1432500034570694, |
| "tpp_threshold_100_intended_diff_only": 0.48500001430511475, |
| "tpp_threshold_100_unintended_diff_only": 0.34175001084804535, |
| "tpp_threshold_500_total_metric": 0.1040000468492508, |
| "tpp_threshold_500_intended_diff_only": 0.49100005626678467, |
| "tpp_threshold_500_unintended_diff_only": 0.3870000094175339 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.10974997282028198, |
| "tpp_threshold_2_intended_diff_only": 0.14099997282028198, |
| "tpp_threshold_2_unintended_diff_only": 0.03125, |
| "tpp_threshold_5_total_metric": 0.2030000239610672, |
| "tpp_threshold_5_intended_diff_only": 0.33100003004074097, |
| "tpp_threshold_5_unintended_diff_only": 0.12800000607967377, |
| "tpp_threshold_10_total_metric": 0.17525000870227814, |
| "tpp_threshold_10_intended_diff_only": 0.3930000066757202, |
| "tpp_threshold_10_unintended_diff_only": 0.21774999797344208, |
| "tpp_threshold_20_total_metric": 0.21349996328353882, |
| "tpp_threshold_20_intended_diff_only": 0.45499998331069946, |
| "tpp_threshold_20_unintended_diff_only": 0.24150002002716064, |
| "tpp_threshold_50_total_metric": 0.1497500091791153, |
| "tpp_threshold_50_intended_diff_only": 0.4740000367164612, |
| "tpp_threshold_50_unintended_diff_only": 0.3242500275373459, |
| "tpp_threshold_100_total_metric": 0.11475002765655518, |
| "tpp_threshold_100_intended_diff_only": 0.4740000367164612, |
| "tpp_threshold_100_unintended_diff_only": 0.359250009059906, |
| "tpp_threshold_500_total_metric": 0.07950001955032349, |
| "tpp_threshold_500_intended_diff_only": 0.4740000367164612, |
| "tpp_threshold_500_unintended_diff_only": 0.3945000171661377 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.006999984383583069, |
| "tpp_threshold_2_intended_diff_only": 0.009999990463256836, |
| "tpp_threshold_2_unintended_diff_only": 0.003000006079673767, |
| "tpp_threshold_5_total_metric": 0.003999948501586914, |
| "tpp_threshold_5_intended_diff_only": 0.007999956607818604, |
| "tpp_threshold_5_unintended_diff_only": 0.0040000081062316895, |
| "tpp_threshold_10_total_metric": 0.018499940633773804, |
| "tpp_threshold_10_intended_diff_only": 0.02599996328353882, |
| "tpp_threshold_10_unintended_diff_only": 0.007500022649765015, |
| "tpp_threshold_20_total_metric": 0.040249958634376526, |
| "tpp_threshold_20_intended_diff_only": 0.05299997329711914, |
| "tpp_threshold_20_unintended_diff_only": 0.012750014662742615, |
| "tpp_threshold_50_total_metric": 0.12424996495246887, |
| "tpp_threshold_50_intended_diff_only": 0.1589999794960022, |
| "tpp_threshold_50_unintended_diff_only": 0.034750014543533325, |
| "tpp_threshold_100_total_metric": 0.23399995267391205, |
| "tpp_threshold_100_intended_diff_only": 0.2789999842643738, |
| "tpp_threshold_100_unintended_diff_only": 0.04500003159046173, |
| "tpp_threshold_500_total_metric": 0.3877499848604202, |
| "tpp_threshold_500_intended_diff_only": 0.4480000138282776, |
| "tpp_threshold_500_unintended_diff_only": 0.06025002896785736 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.03375004231929779, |
| "tpp_threshold_2_intended_diff_only": 0.03400003910064697, |
| "tpp_threshold_2_unintended_diff_only": 0.00024999678134918213, |
| "tpp_threshold_5_total_metric": 0.04600006341934204, |
| "tpp_threshold_5_intended_diff_only": 0.061000049114227295, |
| "tpp_threshold_5_unintended_diff_only": 0.014999985694885254, |
| "tpp_threshold_10_total_metric": 0.09799998998641968, |
| "tpp_threshold_10_intended_diff_only": 0.11100000143051147, |
| "tpp_threshold_10_unintended_diff_only": 0.013000011444091797, |
| "tpp_threshold_20_total_metric": 0.15725001692771912, |
| "tpp_threshold_20_intended_diff_only": 0.18800002336502075, |
| "tpp_threshold_20_unintended_diff_only": 0.030750006437301636, |
| "tpp_threshold_50_total_metric": 0.335750013589859, |
| "tpp_threshold_50_intended_diff_only": 0.4140000343322754, |
| "tpp_threshold_50_unintended_diff_only": 0.07825002074241638, |
| "tpp_threshold_100_total_metric": 0.3370000123977661, |
| "tpp_threshold_100_intended_diff_only": 0.42900002002716064, |
| "tpp_threshold_100_unintended_diff_only": 0.09200000762939453, |
| "tpp_threshold_500_total_metric": 0.29575005173683167, |
| "tpp_threshold_500_intended_diff_only": 0.43000006675720215, |
| "tpp_threshold_500_unintended_diff_only": 0.13425001502037048 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": -0.0007499754428863525, |
| "tpp_threshold_2_intended_diff_only": 0.0020000338554382324, |
| "tpp_threshold_2_unintended_diff_only": 0.002750009298324585, |
| "tpp_threshold_5_total_metric": -0.005499973893165588, |
| "tpp_threshold_5_intended_diff_only": -0.0059999823570251465, |
| "tpp_threshold_5_unintended_diff_only": -0.0005000084638595581, |
| "tpp_threshold_10_total_metric": 0.015750035643577576, |
| "tpp_threshold_10_intended_diff_only": 0.016000032424926758, |
| "tpp_threshold_10_unintended_diff_only": 0.00024999678134918213, |
| "tpp_threshold_20_total_metric": 0.03975002467632294, |
| "tpp_threshold_20_intended_diff_only": 0.04700005054473877, |
| "tpp_threshold_20_unintended_diff_only": 0.0072500258684158325, |
| "tpp_threshold_50_total_metric": 0.1720000058412552, |
| "tpp_threshold_50_intended_diff_only": 0.18400001525878906, |
| "tpp_threshold_50_unintended_diff_only": 0.012000009417533875, |
| "tpp_threshold_100_total_metric": 0.2835000455379486, |
| "tpp_threshold_100_intended_diff_only": 0.3060000538825989, |
| "tpp_threshold_100_unintended_diff_only": 0.02250000834465027, |
| "tpp_threshold_500_total_metric": 0.3760000616312027, |
| "tpp_threshold_500_intended_diff_only": 0.42100006341934204, |
| "tpp_threshold_500_unintended_diff_only": 0.04500000178813934 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.018250003457069397, |
| "tpp_threshold_2_intended_diff_only": 0.018000006675720215, |
| "tpp_threshold_2_unintended_diff_only": -0.00024999678134918213, |
| "tpp_threshold_5_total_metric": 0.03275001049041748, |
| "tpp_threshold_5_intended_diff_only": 0.03600001335144043, |
| "tpp_threshold_5_unintended_diff_only": 0.0032500028610229492, |
| "tpp_threshold_10_total_metric": 0.04950004816055298, |
| "tpp_threshold_10_intended_diff_only": 0.057000041007995605, |
| "tpp_threshold_10_unintended_diff_only": 0.007499992847442627, |
| "tpp_threshold_20_total_metric": 0.1352500319480896, |
| "tpp_threshold_20_intended_diff_only": 0.15000003576278687, |
| "tpp_threshold_20_unintended_diff_only": 0.014750003814697266, |
| "tpp_threshold_50_total_metric": 0.22475004196166992, |
| "tpp_threshold_50_intended_diff_only": 0.2420000433921814, |
| "tpp_threshold_50_unintended_diff_only": 0.017250001430511475, |
| "tpp_threshold_100_total_metric": 0.34150004386901855, |
| "tpp_threshold_100_intended_diff_only": 0.362000048160553, |
| "tpp_threshold_100_unintended_diff_only": 0.020500004291534424, |
| "tpp_threshold_500_total_metric": 0.3692500740289688, |
| "tpp_threshold_500_intended_diff_only": 0.4160000681877136, |
| "tpp_threshold_500_unintended_diff_only": 0.04674999415874481 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.04175001382827759, |
| "tpp_threshold_2_intended_diff_only": 0.04500001668930054, |
| "tpp_threshold_2_unintended_diff_only": 0.0032500028610229492, |
| "tpp_threshold_5_total_metric": 0.0807499885559082, |
| "tpp_threshold_5_intended_diff_only": 0.08300000429153442, |
| "tpp_threshold_5_unintended_diff_only": 0.0022500157356262207, |
| "tpp_threshold_10_total_metric": 0.1367500126361847, |
| "tpp_threshold_10_intended_diff_only": 0.1420000195503235, |
| "tpp_threshold_10_unintended_diff_only": 0.005250006914138794, |
| "tpp_threshold_20_total_metric": 0.17599999904632568, |
| "tpp_threshold_20_intended_diff_only": 0.1850000023841858, |
| "tpp_threshold_20_unintended_diff_only": 0.009000003337860107, |
| "tpp_threshold_50_total_metric": 0.30925001204013824, |
| "tpp_threshold_50_intended_diff_only": 0.3320000171661377, |
| "tpp_threshold_50_unintended_diff_only": 0.02275000512599945, |
| "tpp_threshold_100_total_metric": 0.32450002431869507, |
| "tpp_threshold_100_intended_diff_only": 0.362000048160553, |
| "tpp_threshold_100_unintended_diff_only": 0.03750002384185791, |
| "tpp_threshold_500_total_metric": 0.31050004065036774, |
| "tpp_threshold_500_intended_diff_only": 0.3670000433921814, |
| "tpp_threshold_500_unintended_diff_only": 0.05650000274181366 |
| } |
| } |
| } |
| } |