| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "91ffb92c-ec01-47b1-9f45-773ec12047b3", |
| "datetime_epoch_millis": 1745752729855, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.0796000063419342, |
| "tpp_threshold_2_intended_diff_only": 0.10209999084472655, |
| "tpp_threshold_2_unintended_diff_only": 0.022499984502792357, |
| "tpp_threshold_5_total_metric": 0.12212500423192978, |
| "tpp_threshold_5_intended_diff_only": 0.1834999918937683, |
| "tpp_threshold_5_unintended_diff_only": 0.061374987661838534, |
| "tpp_threshold_10_total_metric": 0.13012500256299972, |
| "tpp_threshold_10_intended_diff_only": 0.2343999981880188, |
| "tpp_threshold_10_unintended_diff_only": 0.10427499562501907, |
| "tpp_threshold_20_total_metric": 0.15907501578330993, |
| "tpp_threshold_20_intended_diff_only": 0.2909000039100647, |
| "tpp_threshold_20_unintended_diff_only": 0.13182498812675475, |
| "tpp_threshold_50_total_metric": 0.18952500969171523, |
| "tpp_threshold_50_intended_diff_only": 0.3651000082492828, |
| "tpp_threshold_50_unintended_diff_only": 0.1755749985575676, |
| "tpp_threshold_100_total_metric": 0.21270000636577605, |
| "tpp_threshold_100_intended_diff_only": 0.414300000667572, |
| "tpp_threshold_100_unintended_diff_only": 0.20159999430179598, |
| "tpp_threshold_500_total_metric": 0.21215003430843354, |
| "tpp_threshold_500_intended_diff_only": 0.44340003132820127, |
| "tpp_threshold_500_unintended_diff_only": 0.23124999701976778 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.14905000925064088, |
| "tpp_threshold_2_intended_diff_only": 0.19019999504089355, |
| "tpp_threshold_2_unintended_diff_only": 0.04114998579025268, |
| "tpp_threshold_5_total_metric": 0.22050001621246337, |
| "tpp_threshold_5_intended_diff_only": 0.33440001010894777, |
| "tpp_threshold_5_unintended_diff_only": 0.11389999389648438, |
| "tpp_threshold_10_total_metric": 0.1982500046491623, |
| "tpp_threshold_10_intended_diff_only": 0.3942000031471252, |
| "tpp_threshold_10_unintended_diff_only": 0.19594999849796296, |
| "tpp_threshold_20_total_metric": 0.18345001339912415, |
| "tpp_threshold_20_intended_diff_only": 0.4300000071525574, |
| "tpp_threshold_20_unintended_diff_only": 0.24654999375343323, |
| "tpp_threshold_50_total_metric": 0.1382500022649765, |
| "tpp_threshold_50_intended_diff_only": 0.4596000075340271, |
| "tpp_threshold_50_unintended_diff_only": 0.3213500052690506, |
| "tpp_threshold_100_total_metric": 0.10435000360012055, |
| "tpp_threshold_100_intended_diff_only": 0.4662000060081482, |
| "tpp_threshold_100_unintended_diff_only": 0.36185000240802767, |
| "tpp_threshold_500_total_metric": 0.07075002789497375, |
| "tpp_threshold_500_intended_diff_only": 0.46700003147125246, |
| "tpp_threshold_500_unintended_diff_only": 0.3962500035762787 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.010150003433227538, |
| "tpp_threshold_2_intended_diff_only": 0.01399998664855957, |
| "tpp_threshold_2_unintended_diff_only": 0.0038499832153320312, |
| "tpp_threshold_5_total_metric": 0.02374999225139618, |
| "tpp_threshold_5_intended_diff_only": 0.03259997367858887, |
| "tpp_threshold_5_unintended_diff_only": 0.008849981427192687, |
| "tpp_threshold_10_total_metric": 0.06200000047683716, |
| "tpp_threshold_10_intended_diff_only": 0.07459999322891235, |
| "tpp_threshold_10_unintended_diff_only": 0.012599992752075195, |
| "tpp_threshold_20_total_metric": 0.13470001816749572, |
| "tpp_threshold_20_intended_diff_only": 0.15180000066757202, |
| "tpp_threshold_20_unintended_diff_only": 0.017099982500076293, |
| "tpp_threshold_50_total_metric": 0.24080001711845397, |
| "tpp_threshold_50_intended_diff_only": 0.27060000896453856, |
| "tpp_threshold_50_unintended_diff_only": 0.029799991846084596, |
| "tpp_threshold_100_total_metric": 0.32105000913143156, |
| "tpp_threshold_100_intended_diff_only": 0.36239999532699585, |
| "tpp_threshold_100_unintended_diff_only": 0.04134998619556427, |
| "tpp_threshold_500_total_metric": 0.35355004072189333, |
| "tpp_threshold_500_intended_diff_only": 0.41980003118515014, |
| "tpp_threshold_500_unintended_diff_only": 0.06624999046325683 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.15.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.15.hook_resid_post", |
| "hook_layer": 15, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.1482500284910202, |
| "tpp_threshold_2_intended_diff_only": 0.17500001192092896, |
| "tpp_threshold_2_unintended_diff_only": 0.026749983429908752, |
| "tpp_threshold_5_total_metric": 0.2042500376701355, |
| "tpp_threshold_5_intended_diff_only": 0.2760000228881836, |
| "tpp_threshold_5_unintended_diff_only": 0.0717499852180481, |
| "tpp_threshold_10_total_metric": 0.2175000160932541, |
| "tpp_threshold_10_intended_diff_only": 0.35199999809265137, |
| "tpp_threshold_10_unintended_diff_only": 0.13449998199939728, |
| "tpp_threshold_20_total_metric": 0.20400004088878632, |
| "tpp_threshold_20_intended_diff_only": 0.39100003242492676, |
| "tpp_threshold_20_unintended_diff_only": 0.18699999153614044, |
| "tpp_threshold_50_total_metric": 0.13699999451637268, |
| "tpp_threshold_50_intended_diff_only": 0.44099998474121094, |
| "tpp_threshold_50_unintended_diff_only": 0.30399999022483826, |
| "tpp_threshold_100_total_metric": 0.08350001275539398, |
| "tpp_threshold_100_intended_diff_only": 0.4490000009536743, |
| "tpp_threshold_100_unintended_diff_only": 0.36549998819828033, |
| "tpp_threshold_500_total_metric": 0.04975003004074097, |
| "tpp_threshold_500_intended_diff_only": 0.45100003480911255, |
| "tpp_threshold_500_unintended_diff_only": 0.4012500047683716 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.16000008583068848, |
| "tpp_threshold_2_intended_diff_only": 0.24100005626678467, |
| "tpp_threshold_2_unintended_diff_only": 0.08099997043609619, |
| "tpp_threshold_5_total_metric": 0.16925007104873657, |
| "tpp_threshold_5_intended_diff_only": 0.3340000510215759, |
| "tpp_threshold_5_unintended_diff_only": 0.16474997997283936, |
| "tpp_threshold_10_total_metric": 0.16050001978874207, |
| "tpp_threshold_10_intended_diff_only": 0.37400001287460327, |
| "tpp_threshold_10_unintended_diff_only": 0.2134999930858612, |
| "tpp_threshold_20_total_metric": 0.11800006031990051, |
| "tpp_threshold_20_intended_diff_only": 0.41300004720687866, |
| "tpp_threshold_20_unintended_diff_only": 0.29499998688697815, |
| "tpp_threshold_50_total_metric": 0.09950006008148193, |
| "tpp_threshold_50_intended_diff_only": 0.4490000605583191, |
| "tpp_threshold_50_unintended_diff_only": 0.34950000047683716, |
| "tpp_threshold_100_total_metric": 0.07350006699562073, |
| "tpp_threshold_100_intended_diff_only": 0.4580000638961792, |
| "tpp_threshold_100_unintended_diff_only": 0.38449999690055847, |
| "tpp_threshold_500_total_metric": 0.05575007200241089, |
| "tpp_threshold_500_intended_diff_only": 0.4580000638961792, |
| "tpp_threshold_500_unintended_diff_only": 0.4022499918937683 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.18474997580051422, |
| "tpp_threshold_2_intended_diff_only": 0.22999995946884155, |
| "tpp_threshold_2_unintended_diff_only": 0.04524998366832733, |
| "tpp_threshold_5_total_metric": 0.19524994492530823, |
| "tpp_threshold_5_intended_diff_only": 0.35499995946884155, |
| "tpp_threshold_5_unintended_diff_only": 0.15975001454353333, |
| "tpp_threshold_10_total_metric": 0.10349996387958527, |
| "tpp_threshold_10_intended_diff_only": 0.390999972820282, |
| "tpp_threshold_10_unintended_diff_only": 0.2875000089406967, |
| "tpp_threshold_20_total_metric": 0.1187499612569809, |
| "tpp_threshold_20_intended_diff_only": 0.4049999713897705, |
| "tpp_threshold_20_unintended_diff_only": 0.2862500101327896, |
| "tpp_threshold_50_total_metric": 0.09999997913837433, |
| "tpp_threshold_50_intended_diff_only": 0.44599997997283936, |
| "tpp_threshold_50_unintended_diff_only": 0.346000000834465, |
| "tpp_threshold_100_total_metric": 0.08699996769428253, |
| "tpp_threshold_100_intended_diff_only": 0.4559999704360962, |
| "tpp_threshold_100_unintended_diff_only": 0.36900000274181366, |
| "tpp_threshold_500_total_metric": 0.052750006318092346, |
| "tpp_threshold_500_intended_diff_only": 0.4570000171661377, |
| "tpp_threshold_500_unintended_diff_only": 0.40425001084804535 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.22825001180171967, |
| "tpp_threshold_2_intended_diff_only": 0.27799999713897705, |
| "tpp_threshold_2_unintended_diff_only": 0.049749985337257385, |
| "tpp_threshold_5_total_metric": 0.3190000057220459, |
| "tpp_threshold_5_intended_diff_only": 0.4259999990463257, |
| "tpp_threshold_5_unintended_diff_only": 0.10699999332427979, |
| "tpp_threshold_10_total_metric": 0.3150000125169754, |
| "tpp_threshold_10_intended_diff_only": 0.4660000205039978, |
| "tpp_threshold_10_unintended_diff_only": 0.1510000079870224, |
| "tpp_threshold_20_total_metric": 0.2682500183582306, |
| "tpp_threshold_20_intended_diff_only": 0.4860000014305115, |
| "tpp_threshold_20_unintended_diff_only": 0.21774998307228088, |
| "tpp_threshold_50_total_metric": 0.1889999955892563, |
| "tpp_threshold_50_intended_diff_only": 0.49000000953674316, |
| "tpp_threshold_50_unintended_diff_only": 0.3010000139474869, |
| "tpp_threshold_100_total_metric": 0.15224996209144592, |
| "tpp_threshold_100_intended_diff_only": 0.49299997091293335, |
| "tpp_threshold_100_unintended_diff_only": 0.3407500088214874, |
| "tpp_threshold_500_total_metric": 0.10700000822544098, |
| "tpp_threshold_500_intended_diff_only": 0.49400001764297485, |
| "tpp_threshold_500_unintended_diff_only": 0.3870000094175339 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.02399994432926178, |
| "tpp_threshold_2_intended_diff_only": 0.026999950408935547, |
| "tpp_threshold_2_unintended_diff_only": 0.003000006079673767, |
| "tpp_threshold_5_total_metric": 0.2147500216960907, |
| "tpp_threshold_5_intended_diff_only": 0.281000018119812, |
| "tpp_threshold_5_unintended_diff_only": 0.06624999642372131, |
| "tpp_threshold_10_total_metric": 0.19475001096725464, |
| "tpp_threshold_10_intended_diff_only": 0.3880000114440918, |
| "tpp_threshold_10_unintended_diff_only": 0.19325000047683716, |
| "tpp_threshold_20_total_metric": 0.2082499861717224, |
| "tpp_threshold_20_intended_diff_only": 0.45499998331069946, |
| "tpp_threshold_20_unintended_diff_only": 0.24674999713897705, |
| "tpp_threshold_50_total_metric": 0.16574998199939728, |
| "tpp_threshold_50_intended_diff_only": 0.47200000286102295, |
| "tpp_threshold_50_unintended_diff_only": 0.30625002086162567, |
| "tpp_threshold_100_total_metric": 0.12550000846385956, |
| "tpp_threshold_100_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_100_unintended_diff_only": 0.34950001537799835, |
| "tpp_threshold_500_total_metric": 0.0885000228881836, |
| "tpp_threshold_500_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_500_unintended_diff_only": 0.3865000009536743 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.015250012278556824, |
| "tpp_threshold_2_intended_diff_only": 0.018999993801116943, |
| "tpp_threshold_2_unintended_diff_only": 0.0037499815225601196, |
| "tpp_threshold_5_total_metric": 0.027500003576278687, |
| "tpp_threshold_5_intended_diff_only": 0.0339999794960022, |
| "tpp_threshold_5_unintended_diff_only": 0.006499975919723511, |
| "tpp_threshold_10_total_metric": 0.05450001358985901, |
| "tpp_threshold_10_intended_diff_only": 0.0690000057220459, |
| "tpp_threshold_10_unintended_diff_only": 0.01449999213218689, |
| "tpp_threshold_20_total_metric": 0.1157500147819519, |
| "tpp_threshold_20_intended_diff_only": 0.12999999523162842, |
| "tpp_threshold_20_unintended_diff_only": 0.014249980449676514, |
| "tpp_threshold_50_total_metric": 0.24775002896785736, |
| "tpp_threshold_50_intended_diff_only": 0.2670000195503235, |
| "tpp_threshold_50_unintended_diff_only": 0.019249990582466125, |
| "tpp_threshold_100_total_metric": 0.3530000299215317, |
| "tpp_threshold_100_intended_diff_only": 0.3830000162124634, |
| "tpp_threshold_100_unintended_diff_only": 0.0299999862909317, |
| "tpp_threshold_500_total_metric": 0.4112500548362732, |
| "tpp_threshold_500_intended_diff_only": 0.46000003814697266, |
| "tpp_threshold_500_unintended_diff_only": 0.04874998331069946 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.003000020980834961, |
| "tpp_threshold_2_intended_diff_only": 0.004999995231628418, |
| "tpp_threshold_2_unintended_diff_only": 0.001999974250793457, |
| "tpp_threshold_5_total_metric": 0.008749991655349731, |
| "tpp_threshold_5_intended_diff_only": 0.029999971389770508, |
| "tpp_threshold_5_unintended_diff_only": 0.021249979734420776, |
| "tpp_threshold_10_total_metric": 0.06174999475479126, |
| "tpp_threshold_10_intended_diff_only": 0.07999998331069946, |
| "tpp_threshold_10_unintended_diff_only": 0.018249988555908203, |
| "tpp_threshold_20_total_metric": 0.22225002944469452, |
| "tpp_threshold_20_intended_diff_only": 0.24900001287460327, |
| "tpp_threshold_20_unintended_diff_only": 0.026749983429908752, |
| "tpp_threshold_50_total_metric": 0.33024999499320984, |
| "tpp_threshold_50_intended_diff_only": 0.38099998235702515, |
| "tpp_threshold_50_unintended_diff_only": 0.05074998736381531, |
| "tpp_threshold_100_total_metric": 0.359749972820282, |
| "tpp_threshold_100_intended_diff_only": 0.4229999780654907, |
| "tpp_threshold_100_unintended_diff_only": 0.06325000524520874, |
| "tpp_threshold_500_total_metric": 0.3240000307559967, |
| "tpp_threshold_500_intended_diff_only": 0.42900002002716064, |
| "tpp_threshold_500_unintended_diff_only": 0.10499998927116394 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": 0.0005000084638595581, |
| "tpp_threshold_2_intended_diff_only": 0.004999995231628418, |
| "tpp_threshold_2_unintended_diff_only": 0.00449998676776886, |
| "tpp_threshold_5_total_metric": 0.010999977588653564, |
| "tpp_threshold_5_intended_diff_only": 0.01699995994567871, |
| "tpp_threshold_5_unintended_diff_only": 0.0059999823570251465, |
| "tpp_threshold_10_total_metric": 0.030000001192092896, |
| "tpp_threshold_10_intended_diff_only": 0.041999995708465576, |
| "tpp_threshold_10_unintended_diff_only": 0.01199999451637268, |
| "tpp_threshold_20_total_metric": 0.06850001215934753, |
| "tpp_threshold_20_intended_diff_only": 0.08399999141693115, |
| "tpp_threshold_20_unintended_diff_only": 0.015499979257583618, |
| "tpp_threshold_50_total_metric": 0.16575002670288086, |
| "tpp_threshold_50_intended_diff_only": 0.18800002336502075, |
| "tpp_threshold_50_unintended_diff_only": 0.022249996662139893, |
| "tpp_threshold_100_total_metric": 0.2815000116825104, |
| "tpp_threshold_100_intended_diff_only": 0.32499998807907104, |
| "tpp_threshold_100_unintended_diff_only": 0.04349997639656067, |
| "tpp_threshold_500_total_metric": 0.35875003039836884, |
| "tpp_threshold_500_intended_diff_only": 0.4280000329017639, |
| "tpp_threshold_500_unintended_diff_only": 0.06925000250339508 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.015749961137771606, |
| "tpp_threshold_2_intended_diff_only": 0.01699995994567871, |
| "tpp_threshold_2_unintended_diff_only": 0.0012499988079071045, |
| "tpp_threshold_5_total_metric": 0.026499971747398376, |
| "tpp_threshold_5_intended_diff_only": 0.030999958515167236, |
| "tpp_threshold_5_unintended_diff_only": 0.00449998676776886, |
| "tpp_threshold_10_total_metric": 0.07674996554851532, |
| "tpp_threshold_10_intended_diff_only": 0.08599996566772461, |
| "tpp_threshold_10_unintended_diff_only": 0.00925000011920929, |
| "tpp_threshold_20_total_metric": 0.10699999332427979, |
| "tpp_threshold_20_intended_diff_only": 0.12199997901916504, |
| "tpp_threshold_20_unintended_diff_only": 0.014999985694885254, |
| "tpp_threshold_50_total_metric": 0.18550002574920654, |
| "tpp_threshold_50_intended_diff_only": 0.2070000171661377, |
| "tpp_threshold_50_unintended_diff_only": 0.021499991416931152, |
| "tpp_threshold_100_total_metric": 0.29224999248981476, |
| "tpp_threshold_100_intended_diff_only": 0.32099997997283936, |
| "tpp_threshold_100_unintended_diff_only": 0.028749987483024597, |
| "tpp_threshold_500_total_metric": 0.38325004279613495, |
| "tpp_threshold_500_intended_diff_only": 0.4190000295639038, |
| "tpp_threshold_500_unintended_diff_only": 0.03574998676776886 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.016250014305114746, |
| "tpp_threshold_2_intended_diff_only": 0.02399998903274536, |
| "tpp_threshold_2_unintended_diff_only": 0.007749974727630615, |
| "tpp_threshold_5_total_metric": 0.04500001668930054, |
| "tpp_threshold_5_intended_diff_only": 0.050999999046325684, |
| "tpp_threshold_5_unintended_diff_only": 0.0059999823570251465, |
| "tpp_threshold_10_total_metric": 0.08700002729892731, |
| "tpp_threshold_10_intended_diff_only": 0.09600001573562622, |
| "tpp_threshold_10_unintended_diff_only": 0.008999988436698914, |
| "tpp_threshold_20_total_metric": 0.1600000411272049, |
| "tpp_threshold_20_intended_diff_only": 0.17400002479553223, |
| "tpp_threshold_20_unintended_diff_only": 0.013999983668327332, |
| "tpp_threshold_50_total_metric": 0.2747500091791153, |
| "tpp_threshold_50_intended_diff_only": 0.3100000023841858, |
| "tpp_threshold_50_unintended_diff_only": 0.035249993205070496, |
| "tpp_threshold_100_total_metric": 0.3187500387430191, |
| "tpp_threshold_100_intended_diff_only": 0.36000001430511475, |
| "tpp_threshold_100_unintended_diff_only": 0.04124997556209564, |
| "tpp_threshold_500_total_metric": 0.29050004482269287, |
| "tpp_threshold_500_intended_diff_only": 0.3630000352859497, |
| "tpp_threshold_500_unintended_diff_only": 0.07249999046325684 |
| } |
| } |
| } |
| } |