| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "65b37170-42a3-4f63-bfdc-b4728a912b3a", |
| "datetime_epoch_millis": 1745616760801, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.024874997138977048, |
| "tpp_threshold_2_intended_diff_only": 0.033899998664855956, |
| "tpp_threshold_2_unintended_diff_only": 0.009025001525878906, |
| "tpp_threshold_5_total_metric": 0.13442501723766326, |
| "tpp_threshold_5_intended_diff_only": 0.1695000171661377, |
| "tpp_threshold_5_unintended_diff_only": 0.035074999928474425, |
| "tpp_threshold_10_total_metric": 0.19005000889301302, |
| "tpp_threshold_10_intended_diff_only": 0.2595000147819519, |
| "tpp_threshold_10_unintended_diff_only": 0.0694500058889389, |
| "tpp_threshold_20_total_metric": 0.2113750219345093, |
| "tpp_threshold_20_intended_diff_only": 0.3225000262260437, |
| "tpp_threshold_20_unintended_diff_only": 0.11112500429153442, |
| "tpp_threshold_50_total_metric": 0.2433750182390213, |
| "tpp_threshold_50_intended_diff_only": 0.40350002646446226, |
| "tpp_threshold_50_unintended_diff_only": 0.16012500822544096, |
| "tpp_threshold_100_total_metric": 0.2399250164628029, |
| "tpp_threshold_100_intended_diff_only": 0.4273000299930573, |
| "tpp_threshold_100_unintended_diff_only": 0.18737501353025438, |
| "tpp_threshold_500_total_metric": 0.1992250308394432, |
| "tpp_threshold_500_intended_diff_only": 0.4363000452518463, |
| "tpp_threshold_500_unintended_diff_only": 0.23707501441240308 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.035099995136260984, |
| "tpp_threshold_2_intended_diff_only": 0.04880000352859497, |
| "tpp_threshold_2_unintended_diff_only": 0.013700008392333984, |
| "tpp_threshold_5_total_metric": 0.22205002903938292, |
| "tpp_threshold_5_intended_diff_only": 0.28260003328323363, |
| "tpp_threshold_5_unintended_diff_only": 0.06055000424385071, |
| "tpp_threshold_10_total_metric": 0.26175002157688143, |
| "tpp_threshold_10_intended_diff_only": 0.37980003356933595, |
| "tpp_threshold_10_unintended_diff_only": 0.11805001199245453, |
| "tpp_threshold_20_total_metric": 0.23755002617836, |
| "tpp_threshold_20_intended_diff_only": 0.434000039100647, |
| "tpp_threshold_20_unintended_diff_only": 0.19645001292228698, |
| "tpp_threshold_50_total_metric": 0.1862000048160553, |
| "tpp_threshold_50_intended_diff_only": 0.4600000262260437, |
| "tpp_threshold_50_unintended_diff_only": 0.2738000214099884, |
| "tpp_threshold_100_total_metric": 0.13955002427101135, |
| "tpp_threshold_100_intended_diff_only": 0.463800048828125, |
| "tpp_threshold_100_unintended_diff_only": 0.32425002455711366, |
| "tpp_threshold_500_total_metric": 0.07320002615451812, |
| "tpp_threshold_500_intended_diff_only": 0.464400053024292, |
| "tpp_threshold_500_unintended_diff_only": 0.39120002686977384 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.014649999141693116, |
| "tpp_threshold_2_intended_diff_only": 0.018999993801116943, |
| "tpp_threshold_2_unintended_diff_only": 0.004349994659423828, |
| "tpp_threshold_5_total_metric": 0.046800005435943606, |
| "tpp_threshold_5_intended_diff_only": 0.05640000104904175, |
| "tpp_threshold_5_unintended_diff_only": 0.009599995613098145, |
| "tpp_threshold_10_total_metric": 0.11834999620914459, |
| "tpp_threshold_10_intended_diff_only": 0.13919999599456787, |
| "tpp_threshold_10_unintended_diff_only": 0.02084999978542328, |
| "tpp_threshold_20_total_metric": 0.18520001769065858, |
| "tpp_threshold_20_intended_diff_only": 0.21100001335144042, |
| "tpp_threshold_20_unintended_diff_only": 0.02579999566078186, |
| "tpp_threshold_50_total_metric": 0.3005500316619873, |
| "tpp_threshold_50_intended_diff_only": 0.3470000267028809, |
| "tpp_threshold_50_unintended_diff_only": 0.04644999504089355, |
| "tpp_threshold_100_total_metric": 0.3403000086545944, |
| "tpp_threshold_100_intended_diff_only": 0.3908000111579895, |
| "tpp_threshold_100_unintended_diff_only": 0.05050000250339508, |
| "tpp_threshold_500_total_metric": 0.32525003552436826, |
| "tpp_threshold_500_intended_diff_only": 0.40820003747940065, |
| "tpp_threshold_500_unintended_diff_only": 0.08295000195503235 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.1.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-snap-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.1.hook_resid_post", |
| "hook_layer": 1, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.027750015258789062, |
| "tpp_threshold_2_intended_diff_only": 0.03100001811981201, |
| "tpp_threshold_2_unintended_diff_only": 0.0032500028610229492, |
| "tpp_threshold_5_total_metric": 0.24875003099441528, |
| "tpp_threshold_5_intended_diff_only": 0.34400004148483276, |
| "tpp_threshold_5_unintended_diff_only": 0.09525001049041748, |
| "tpp_threshold_10_total_metric": 0.20725001394748688, |
| "tpp_threshold_10_intended_diff_only": 0.38700002431869507, |
| "tpp_threshold_10_unintended_diff_only": 0.1797500103712082, |
| "tpp_threshold_20_total_metric": 0.15825001895427704, |
| "tpp_threshold_20_intended_diff_only": 0.4180000424385071, |
| "tpp_threshold_20_unintended_diff_only": 0.25975002348423004, |
| "tpp_threshold_50_total_metric": 0.15349997580051422, |
| "tpp_threshold_50_intended_diff_only": 0.4399999976158142, |
| "tpp_threshold_50_unintended_diff_only": 0.2865000218153, |
| "tpp_threshold_100_total_metric": 0.11874999105930328, |
| "tpp_threshold_100_intended_diff_only": 0.44700002670288086, |
| "tpp_threshold_100_unintended_diff_only": 0.3282500356435776, |
| "tpp_threshold_500_total_metric": 0.05175001919269562, |
| "tpp_threshold_500_intended_diff_only": 0.4500000476837158, |
| "tpp_threshold_500_unintended_diff_only": 0.3982500284910202 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": -0.007749989628791809, |
| "tpp_threshold_2_intended_diff_only": -0.0009999871253967285, |
| "tpp_threshold_2_unintended_diff_only": 0.006750002503395081, |
| "tpp_threshold_5_total_metric": 0.19400005042552948, |
| "tpp_threshold_5_intended_diff_only": 0.2510000467300415, |
| "tpp_threshold_5_unintended_diff_only": 0.056999996304512024, |
| "tpp_threshold_10_total_metric": 0.23200002312660217, |
| "tpp_threshold_10_intended_diff_only": 0.30800002813339233, |
| "tpp_threshold_10_unintended_diff_only": 0.07600000500679016, |
| "tpp_threshold_20_total_metric": 0.22200004756450653, |
| "tpp_threshold_20_intended_diff_only": 0.3890000581741333, |
| "tpp_threshold_20_unintended_diff_only": 0.16700001060962677, |
| "tpp_threshold_50_total_metric": 0.19375000894069672, |
| "tpp_threshold_50_intended_diff_only": 0.45600003004074097, |
| "tpp_threshold_50_unintended_diff_only": 0.26225002110004425, |
| "tpp_threshold_100_total_metric": 0.14575006067752838, |
| "tpp_threshold_100_intended_diff_only": 0.4620000720024109, |
| "tpp_threshold_100_unintended_diff_only": 0.3162500113248825, |
| "tpp_threshold_500_total_metric": 0.06575004756450653, |
| "tpp_threshold_500_intended_diff_only": 0.4620000720024109, |
| "tpp_threshold_500_unintended_diff_only": 0.39625002443790436 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.018749967217445374, |
| "tpp_threshold_2_intended_diff_only": 0.018999993801116943, |
| "tpp_threshold_2_unintended_diff_only": 0.0002500265836715698, |
| "tpp_threshold_5_total_metric": 0.2095000445842743, |
| "tpp_threshold_5_intended_diff_only": 0.24100005626678467, |
| "tpp_threshold_5_unintended_diff_only": 0.031500011682510376, |
| "tpp_threshold_10_total_metric": 0.2995000332593918, |
| "tpp_threshold_10_intended_diff_only": 0.39900004863739014, |
| "tpp_threshold_10_unintended_diff_only": 0.09950001537799835, |
| "tpp_threshold_20_total_metric": 0.2757500410079956, |
| "tpp_threshold_20_intended_diff_only": 0.4320000410079956, |
| "tpp_threshold_20_unintended_diff_only": 0.15625, |
| "tpp_threshold_50_total_metric": 0.19625000655651093, |
| "tpp_threshold_50_intended_diff_only": 0.4480000138282776, |
| "tpp_threshold_50_unintended_diff_only": 0.25175000727176666, |
| "tpp_threshold_100_total_metric": 0.09900003671646118, |
| "tpp_threshold_100_intended_diff_only": 0.4490000605583191, |
| "tpp_threshold_100_unintended_diff_only": 0.3500000238418579, |
| "tpp_threshold_500_total_metric": 0.042250022292137146, |
| "tpp_threshold_500_intended_diff_only": 0.4490000605583191, |
| "tpp_threshold_500_unintended_diff_only": 0.40675003826618195 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.001249939203262329, |
| "tpp_threshold_2_intended_diff_only": 0.003999948501586914, |
| "tpp_threshold_2_unintended_diff_only": 0.002750009298324585, |
| "tpp_threshold_5_total_metric": 0.1682499647140503, |
| "tpp_threshold_5_intended_diff_only": 0.22899997234344482, |
| "tpp_threshold_5_unintended_diff_only": 0.06075000762939453, |
| "tpp_threshold_10_total_metric": 0.23125000298023224, |
| "tpp_threshold_10_intended_diff_only": 0.4010000228881836, |
| "tpp_threshold_10_unintended_diff_only": 0.16975001990795135, |
| "tpp_threshold_20_total_metric": 0.24474996328353882, |
| "tpp_threshold_20_intended_diff_only": 0.45899999141693115, |
| "tpp_threshold_20_unintended_diff_only": 0.21425002813339233, |
| "tpp_threshold_50_total_metric": 0.195250004529953, |
| "tpp_threshold_50_intended_diff_only": 0.484000027179718, |
| "tpp_threshold_50_unintended_diff_only": 0.288750022649765, |
| "tpp_threshold_100_total_metric": 0.16974999010562897, |
| "tpp_threshold_100_intended_diff_only": 0.48900002241134644, |
| "tpp_threshold_100_unintended_diff_only": 0.31925003230571747, |
| "tpp_threshold_500_total_metric": 0.11499999463558197, |
| "tpp_threshold_500_intended_diff_only": 0.48900002241134644, |
| "tpp_threshold_500_unintended_diff_only": 0.37400002777576447 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.13550004363059998, |
| "tpp_threshold_2_intended_diff_only": 0.1910000443458557, |
| "tpp_threshold_2_unintended_diff_only": 0.05550000071525574, |
| "tpp_threshold_5_total_metric": 0.2897500544786453, |
| "tpp_threshold_5_intended_diff_only": 0.34800004959106445, |
| "tpp_threshold_5_unintended_diff_only": 0.05824999511241913, |
| "tpp_threshold_10_total_metric": 0.33875003457069397, |
| "tpp_threshold_10_intended_diff_only": 0.40400004386901855, |
| "tpp_threshold_10_unintended_diff_only": 0.06525000929832458, |
| "tpp_threshold_20_total_metric": 0.28700006008148193, |
| "tpp_threshold_20_intended_diff_only": 0.4720000624656677, |
| "tpp_threshold_20_unintended_diff_only": 0.1850000023841858, |
| "tpp_threshold_50_total_metric": 0.19225002825260162, |
| "tpp_threshold_50_intended_diff_only": 0.4720000624656677, |
| "tpp_threshold_50_unintended_diff_only": 0.2797500342130661, |
| "tpp_threshold_100_total_metric": 0.16450004279613495, |
| "tpp_threshold_100_intended_diff_only": 0.4720000624656677, |
| "tpp_threshold_100_unintended_diff_only": 0.3075000196695328, |
| "tpp_threshold_500_total_metric": 0.09125004708766937, |
| "tpp_threshold_500_intended_diff_only": 0.4720000624656677, |
| "tpp_threshold_500_unintended_diff_only": 0.38075001537799835 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.016999974846839905, |
| "tpp_threshold_2_intended_diff_only": 0.019999980926513672, |
| "tpp_threshold_2_unintended_diff_only": 0.003000006079673767, |
| "tpp_threshold_5_total_metric": 0.030999988317489624, |
| "tpp_threshold_5_intended_diff_only": 0.0339999794960022, |
| "tpp_threshold_5_unintended_diff_only": 0.0029999911785125732, |
| "tpp_threshold_10_total_metric": 0.2659999877214432, |
| "tpp_threshold_10_intended_diff_only": 0.3149999976158142, |
| "tpp_threshold_10_unintended_diff_only": 0.04900000989437103, |
| "tpp_threshold_20_total_metric": 0.3384999781847, |
| "tpp_threshold_20_intended_diff_only": 0.390999972820282, |
| "tpp_threshold_20_unintended_diff_only": 0.05249999463558197, |
| "tpp_threshold_50_total_metric": 0.356750026345253, |
| "tpp_threshold_50_intended_diff_only": 0.4240000247955322, |
| "tpp_threshold_50_unintended_diff_only": 0.06724999845027924, |
| "tpp_threshold_100_total_metric": 0.35124996304512024, |
| "tpp_threshold_100_intended_diff_only": 0.43199998140335083, |
| "tpp_threshold_100_unintended_diff_only": 0.08075001835823059, |
| "tpp_threshold_500_total_metric": 0.34575001895427704, |
| "tpp_threshold_500_intended_diff_only": 0.43300002813339233, |
| "tpp_threshold_500_unintended_diff_only": 0.0872500091791153 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.013000041246414185, |
| "tpp_threshold_2_intended_diff_only": 0.017000019550323486, |
| "tpp_threshold_2_unintended_diff_only": 0.003999978303909302, |
| "tpp_threshold_5_total_metric": 0.046000003814697266, |
| "tpp_threshold_5_intended_diff_only": 0.06000000238418579, |
| "tpp_threshold_5_unintended_diff_only": 0.013999998569488525, |
| "tpp_threshold_10_total_metric": 0.06275001168251038, |
| "tpp_threshold_10_intended_diff_only": 0.078000009059906, |
| "tpp_threshold_10_unintended_diff_only": 0.01524999737739563, |
| "tpp_threshold_20_total_metric": 0.14875002205371857, |
| "tpp_threshold_20_intended_diff_only": 0.17400002479553223, |
| "tpp_threshold_20_unintended_diff_only": 0.02525000274181366, |
| "tpp_threshold_50_total_metric": 0.2825000435113907, |
| "tpp_threshold_50_intended_diff_only": 0.3720000386238098, |
| "tpp_threshold_50_unintended_diff_only": 0.08949999511241913, |
| "tpp_threshold_100_total_metric": 0.34250006079673767, |
| "tpp_threshold_100_intended_diff_only": 0.4270000457763672, |
| "tpp_threshold_100_unintended_diff_only": 0.08449998497962952, |
| "tpp_threshold_500_total_metric": 0.28825004398822784, |
| "tpp_threshold_500_intended_diff_only": 0.4320000410079956, |
| "tpp_threshold_500_unintended_diff_only": 0.14374999701976776 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": 0.012999966740608215, |
| "tpp_threshold_2_intended_diff_only": 0.011999964714050293, |
| "tpp_threshold_2_unintended_diff_only": -0.0010000020265579224, |
| "tpp_threshold_5_total_metric": 0.01975002884864807, |
| "tpp_threshold_5_intended_diff_only": 0.03100001811981201, |
| "tpp_threshold_5_unintended_diff_only": 0.01124998927116394, |
| "tpp_threshold_10_total_metric": 0.032999977469444275, |
| "tpp_threshold_10_intended_diff_only": 0.042999982833862305, |
| "tpp_threshold_10_unintended_diff_only": 0.01000000536441803, |
| "tpp_threshold_20_total_metric": 0.10350003838539124, |
| "tpp_threshold_20_intended_diff_only": 0.11400002241134644, |
| "tpp_threshold_20_unintended_diff_only": 0.0104999840259552, |
| "tpp_threshold_50_total_metric": 0.29100000858306885, |
| "tpp_threshold_50_intended_diff_only": 0.30400002002716064, |
| "tpp_threshold_50_unintended_diff_only": 0.013000011444091797, |
| "tpp_threshold_100_total_metric": 0.35249999165534973, |
| "tpp_threshold_100_intended_diff_only": 0.3659999966621399, |
| "tpp_threshold_100_unintended_diff_only": 0.013500005006790161, |
| "tpp_threshold_500_total_metric": 0.3537500351667404, |
| "tpp_threshold_500_intended_diff_only": 0.4050000309944153, |
| "tpp_threshold_500_unintended_diff_only": 0.051249995827674866 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.013000041246414185, |
| "tpp_threshold_2_intended_diff_only": 0.021000027656555176, |
| "tpp_threshold_2_unintended_diff_only": 0.007999986410140991, |
| "tpp_threshold_5_total_metric": 0.023250028491020203, |
| "tpp_threshold_5_intended_diff_only": 0.030000030994415283, |
| "tpp_threshold_5_unintended_diff_only": 0.006750002503395081, |
| "tpp_threshold_10_total_metric": 0.08850003778934479, |
| "tpp_threshold_10_intended_diff_only": 0.09500002861022949, |
| "tpp_threshold_10_unintended_diff_only": 0.006499990820884705, |
| "tpp_threshold_20_total_metric": 0.12925004959106445, |
| "tpp_threshold_20_intended_diff_only": 0.14500004053115845, |
| "tpp_threshold_20_unintended_diff_only": 0.015749990940093994, |
| "tpp_threshold_50_total_metric": 0.2845000773668289, |
| "tpp_threshold_50_intended_diff_only": 0.3060000538825989, |
| "tpp_threshold_50_unintended_diff_only": 0.02149997651576996, |
| "tpp_threshold_100_total_metric": 0.3435000628232956, |
| "tpp_threshold_100_intended_diff_only": 0.37000006437301636, |
| "tpp_threshold_100_unintended_diff_only": 0.026500001549720764, |
| "tpp_threshold_500_total_metric": 0.3612500876188278, |
| "tpp_threshold_500_intended_diff_only": 0.4110000729560852, |
| "tpp_threshold_500_unintended_diff_only": 0.049749985337257385 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.017249971628189087, |
| "tpp_threshold_2_intended_diff_only": 0.02499997615814209, |
| "tpp_threshold_2_unintended_diff_only": 0.007750004529953003, |
| "tpp_threshold_5_total_metric": 0.11399997770786285, |
| "tpp_threshold_5_intended_diff_only": 0.12699997425079346, |
| "tpp_threshold_5_unintended_diff_only": 0.012999996542930603, |
| "tpp_threshold_10_total_metric": 0.14149996638298035, |
| "tpp_threshold_10_intended_diff_only": 0.16499996185302734, |
| "tpp_threshold_10_unintended_diff_only": 0.023499995470046997, |
| "tpp_threshold_20_total_metric": 0.20600000023841858, |
| "tpp_threshold_20_intended_diff_only": 0.23100000619888306, |
| "tpp_threshold_20_unintended_diff_only": 0.025000005960464478, |
| "tpp_threshold_50_total_metric": 0.2880000025033951, |
| "tpp_threshold_50_intended_diff_only": 0.32899999618530273, |
| "tpp_threshold_50_unintended_diff_only": 0.040999993681907654, |
| "tpp_threshold_100_total_metric": 0.31174996495246887, |
| "tpp_threshold_100_intended_diff_only": 0.35899996757507324, |
| "tpp_threshold_100_unintended_diff_only": 0.04725000262260437, |
| "tpp_threshold_500_total_metric": 0.2772499918937683, |
| "tpp_threshold_500_intended_diff_only": 0.36000001430511475, |
| "tpp_threshold_500_unintended_diff_only": 0.08275002241134644 |
| } |
| } |
| } |
| } |