| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "65b37170-42a3-4f63-bfdc-b4728a912b3a", |
| "datetime_epoch_millis": 1745618916296, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.08267499208450318, |
| "tpp_threshold_2_intended_diff_only": 0.11179999709129333, |
| "tpp_threshold_2_unintended_diff_only": 0.02912500500679016, |
| "tpp_threshold_5_total_metric": 0.12242501080036162, |
| "tpp_threshold_5_intended_diff_only": 0.1674000144004822, |
| "tpp_threshold_5_unintended_diff_only": 0.04497500360012054, |
| "tpp_threshold_10_total_metric": 0.15242501646280288, |
| "tpp_threshold_10_intended_diff_only": 0.2254000186920166, |
| "tpp_threshold_10_unintended_diff_only": 0.07297500222921371, |
| "tpp_threshold_20_total_metric": 0.16577500402927398, |
| "tpp_threshold_20_intended_diff_only": 0.28600001335144043, |
| "tpp_threshold_20_unintended_diff_only": 0.12022500932216644, |
| "tpp_threshold_50_total_metric": 0.1967750072479248, |
| "tpp_threshold_50_intended_diff_only": 0.3609000205993652, |
| "tpp_threshold_50_unintended_diff_only": 0.16412501335144042, |
| "tpp_threshold_100_total_metric": 0.22020000964403152, |
| "tpp_threshold_100_intended_diff_only": 0.4108000218868255, |
| "tpp_threshold_100_unintended_diff_only": 0.19060001224279405, |
| "tpp_threshold_500_total_metric": 0.21022502779960633, |
| "tpp_threshold_500_intended_diff_only": 0.44210004806518555, |
| "tpp_threshold_500_unintended_diff_only": 0.2318750202655792 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.14239999353885652, |
| "tpp_threshold_2_intended_diff_only": 0.19759999513626098, |
| "tpp_threshold_2_unintended_diff_only": 0.05520000159740448, |
| "tpp_threshold_5_total_metric": 0.19339999854564666, |
| "tpp_threshold_5_intended_diff_only": 0.27760000228881837, |
| "tpp_threshold_5_unintended_diff_only": 0.08420000374317169, |
| "tpp_threshold_10_total_metric": 0.22930001020431517, |
| "tpp_threshold_10_intended_diff_only": 0.3658000111579895, |
| "tpp_threshold_10_unintended_diff_only": 0.13650000095367432, |
| "tpp_threshold_20_total_metric": 0.19795000553131104, |
| "tpp_threshold_20_intended_diff_only": 0.42300001382827757, |
| "tpp_threshold_20_unintended_diff_only": 0.22505000829696656, |
| "tpp_threshold_50_total_metric": 0.1563500016927719, |
| "tpp_threshold_50_intended_diff_only": 0.4552000164985657, |
| "tpp_threshold_50_unintended_diff_only": 0.29885001480579376, |
| "tpp_threshold_100_total_metric": 0.12785001397132872, |
| "tpp_threshold_100_intended_diff_only": 0.4632000207901001, |
| "tpp_threshold_100_unintended_diff_only": 0.33535000681877136, |
| "tpp_threshold_500_total_metric": 0.0754000186920166, |
| "tpp_threshold_500_intended_diff_only": 0.4670000433921814, |
| "tpp_threshold_500_unintended_diff_only": 0.3916000247001648 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.022949990630149842, |
| "tpp_threshold_2_intended_diff_only": 0.025999999046325682, |
| "tpp_threshold_2_unintended_diff_only": 0.0030500084161758424, |
| "tpp_threshold_5_total_metric": 0.0514500230550766, |
| "tpp_threshold_5_intended_diff_only": 0.05720002651214599, |
| "tpp_threshold_5_unintended_diff_only": 0.005750003457069397, |
| "tpp_threshold_10_total_metric": 0.07555002272129059, |
| "tpp_threshold_10_intended_diff_only": 0.0850000262260437, |
| "tpp_threshold_10_unintended_diff_only": 0.009450003504753113, |
| "tpp_threshold_20_total_metric": 0.13360000252723694, |
| "tpp_threshold_20_intended_diff_only": 0.14900001287460327, |
| "tpp_threshold_20_unintended_diff_only": 0.015400010347366332, |
| "tpp_threshold_50_total_metric": 0.2372000128030777, |
| "tpp_threshold_50_intended_diff_only": 0.2666000247001648, |
| "tpp_threshold_50_unintended_diff_only": 0.029400011897087096, |
| "tpp_threshold_100_total_metric": 0.3125500053167343, |
| "tpp_threshold_100_intended_diff_only": 0.358400022983551, |
| "tpp_threshold_100_unintended_diff_only": 0.04585001766681671, |
| "tpp_threshold_500_total_metric": 0.34505003690719604, |
| "tpp_threshold_500_intended_diff_only": 0.4172000527381897, |
| "tpp_threshold_500_unintended_diff_only": 0.07215001583099365 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.14.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-snap-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.14.hook_resid_post", |
| "hook_layer": 14, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.11625002324581146, |
| "tpp_threshold_2_intended_diff_only": 0.18800002336502075, |
| "tpp_threshold_2_unintended_diff_only": 0.07175000011920929, |
| "tpp_threshold_5_total_metric": 0.15724997222423553, |
| "tpp_threshold_5_intended_diff_only": 0.2329999804496765, |
| "tpp_threshold_5_unintended_diff_only": 0.07575000822544098, |
| "tpp_threshold_10_total_metric": 0.2124999612569809, |
| "tpp_threshold_10_intended_diff_only": 0.3489999771118164, |
| "tpp_threshold_10_unintended_diff_only": 0.1365000158548355, |
| "tpp_threshold_20_total_metric": 0.16474997997283936, |
| "tpp_threshold_20_intended_diff_only": 0.40799999237060547, |
| "tpp_threshold_20_unintended_diff_only": 0.2432500123977661, |
| "tpp_threshold_50_total_metric": 0.1655000001192093, |
| "tpp_threshold_50_intended_diff_only": 0.44300001859664917, |
| "tpp_threshold_50_unintended_diff_only": 0.2775000184774399, |
| "tpp_threshold_100_total_metric": 0.14174997806549072, |
| "tpp_threshold_100_intended_diff_only": 0.45399999618530273, |
| "tpp_threshold_100_unintended_diff_only": 0.312250018119812, |
| "tpp_threshold_500_total_metric": 0.06524999439716339, |
| "tpp_threshold_500_intended_diff_only": 0.45600003004074097, |
| "tpp_threshold_500_unintended_diff_only": 0.3907500356435776 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.15349997580051422, |
| "tpp_threshold_2_intended_diff_only": 0.24699997901916504, |
| "tpp_threshold_2_unintended_diff_only": 0.09350000321865082, |
| "tpp_threshold_5_total_metric": 0.18050000071525574, |
| "tpp_threshold_5_intended_diff_only": 0.29100000858306885, |
| "tpp_threshold_5_unintended_diff_only": 0.11050000786781311, |
| "tpp_threshold_10_total_metric": 0.19475001096725464, |
| "tpp_threshold_10_intended_diff_only": 0.3370000123977661, |
| "tpp_threshold_10_unintended_diff_only": 0.14225000143051147, |
| "tpp_threshold_20_total_metric": 0.1394999921321869, |
| "tpp_threshold_20_intended_diff_only": 0.3889999985694885, |
| "tpp_threshold_20_unintended_diff_only": 0.24950000643730164, |
| "tpp_threshold_50_total_metric": 0.13050001859664917, |
| "tpp_threshold_50_intended_diff_only": 0.437000036239624, |
| "tpp_threshold_50_unintended_diff_only": 0.30650001764297485, |
| "tpp_threshold_100_total_metric": 0.09574998915195465, |
| "tpp_threshold_100_intended_diff_only": 0.4490000009536743, |
| "tpp_threshold_100_unintended_diff_only": 0.35325001180171967, |
| "tpp_threshold_500_total_metric": 0.059250012040138245, |
| "tpp_threshold_500_intended_diff_only": 0.46000003814697266, |
| "tpp_threshold_500_unintended_diff_only": 0.4007500261068344 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.16324999928474426, |
| "tpp_threshold_2_intended_diff_only": 0.23100000619888306, |
| "tpp_threshold_2_unintended_diff_only": 0.0677500069141388, |
| "tpp_threshold_5_total_metric": 0.2067500352859497, |
| "tpp_threshold_5_intended_diff_only": 0.2850000262260437, |
| "tpp_threshold_5_unintended_diff_only": 0.078249990940094, |
| "tpp_threshold_10_total_metric": 0.1860000044107437, |
| "tpp_threshold_10_intended_diff_only": 0.3610000014305115, |
| "tpp_threshold_10_unintended_diff_only": 0.17499999701976776, |
| "tpp_threshold_20_total_metric": 0.1627500355243683, |
| "tpp_threshold_20_intended_diff_only": 0.40400004386901855, |
| "tpp_threshold_20_unintended_diff_only": 0.24125000834465027, |
| "tpp_threshold_50_total_metric": 0.1327500194311142, |
| "tpp_threshold_50_intended_diff_only": 0.44200003147125244, |
| "tpp_threshold_50_unintended_diff_only": 0.30925001204013824, |
| "tpp_threshold_100_total_metric": 0.10500004887580872, |
| "tpp_threshold_100_intended_diff_only": 0.4540000557899475, |
| "tpp_threshold_100_unintended_diff_only": 0.3490000069141388, |
| "tpp_threshold_500_total_metric": 0.043500036001205444, |
| "tpp_threshold_500_intended_diff_only": 0.4540000557899475, |
| "tpp_threshold_500_unintended_diff_only": 0.41050001978874207 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.09424999356269836, |
| "tpp_threshold_2_intended_diff_only": 0.1029999852180481, |
| "tpp_threshold_2_unintended_diff_only": 0.008749991655349731, |
| "tpp_threshold_5_total_metric": 0.18775001168251038, |
| "tpp_threshold_5_intended_diff_only": 0.30000001192092896, |
| "tpp_threshold_5_unintended_diff_only": 0.11225000023841858, |
| "tpp_threshold_10_total_metric": 0.2680000364780426, |
| "tpp_threshold_10_intended_diff_only": 0.437000036239624, |
| "tpp_threshold_10_unintended_diff_only": 0.16899999976158142, |
| "tpp_threshold_20_total_metric": 0.24675001204013824, |
| "tpp_threshold_20_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_20_unintended_diff_only": 0.22825001180171967, |
| "tpp_threshold_50_total_metric": 0.16624999046325684, |
| "tpp_threshold_50_intended_diff_only": 0.48100000619888306, |
| "tpp_threshold_50_unintended_diff_only": 0.3147500157356262, |
| "tpp_threshold_100_total_metric": 0.138000026345253, |
| "tpp_threshold_100_intended_diff_only": 0.48500001430511475, |
| "tpp_threshold_100_unintended_diff_only": 0.34699998795986176, |
| "tpp_threshold_500_total_metric": 0.1170000433921814, |
| "tpp_threshold_500_intended_diff_only": 0.49100005626678467, |
| "tpp_threshold_500_unintended_diff_only": 0.37400001287460327 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.18474997580051422, |
| "tpp_threshold_2_intended_diff_only": 0.218999981880188, |
| "tpp_threshold_2_unintended_diff_only": 0.03425000607967377, |
| "tpp_threshold_5_total_metric": 0.23474997282028198, |
| "tpp_threshold_5_intended_diff_only": 0.2789999842643738, |
| "tpp_threshold_5_unintended_diff_only": 0.0442500114440918, |
| "tpp_threshold_10_total_metric": 0.2852500379085541, |
| "tpp_threshold_10_intended_diff_only": 0.3450000286102295, |
| "tpp_threshold_10_unintended_diff_only": 0.059749990701675415, |
| "tpp_threshold_20_total_metric": 0.2760000079870224, |
| "tpp_threshold_20_intended_diff_only": 0.4390000104904175, |
| "tpp_threshold_20_unintended_diff_only": 0.16300000250339508, |
| "tpp_threshold_50_total_metric": 0.18674997985363007, |
| "tpp_threshold_50_intended_diff_only": 0.4729999899864197, |
| "tpp_threshold_50_unintended_diff_only": 0.2862500101327896, |
| "tpp_threshold_100_total_metric": 0.1587500274181366, |
| "tpp_threshold_100_intended_diff_only": 0.4740000367164612, |
| "tpp_threshold_100_unintended_diff_only": 0.3152500092983246, |
| "tpp_threshold_500_total_metric": 0.09200000762939453, |
| "tpp_threshold_500_intended_diff_only": 0.4740000367164612, |
| "tpp_threshold_500_unintended_diff_only": 0.38200002908706665 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.008999958634376526, |
| "tpp_threshold_2_intended_diff_only": 0.010999977588653564, |
| "tpp_threshold_2_unintended_diff_only": 0.0020000189542770386, |
| "tpp_threshold_5_total_metric": 0.007749974727630615, |
| "tpp_threshold_5_intended_diff_only": 0.013999998569488525, |
| "tpp_threshold_5_unintended_diff_only": 0.00625002384185791, |
| "tpp_threshold_10_total_metric": 0.012749969959259033, |
| "tpp_threshold_10_intended_diff_only": 0.019999980926513672, |
| "tpp_threshold_10_unintended_diff_only": 0.007250010967254639, |
| "tpp_threshold_20_total_metric": 0.028499916195869446, |
| "tpp_threshold_20_intended_diff_only": 0.04099994897842407, |
| "tpp_threshold_20_unintended_diff_only": 0.012500032782554626, |
| "tpp_threshold_50_total_metric": 0.1094999611377716, |
| "tpp_threshold_50_intended_diff_only": 0.14499998092651367, |
| "tpp_threshold_50_unintended_diff_only": 0.035500019788742065, |
| "tpp_threshold_100_total_metric": 0.27299994230270386, |
| "tpp_threshold_100_intended_diff_only": 0.312999963760376, |
| "tpp_threshold_100_unintended_diff_only": 0.04000002145767212, |
| "tpp_threshold_500_total_metric": 0.39399999380111694, |
| "tpp_threshold_500_intended_diff_only": 0.4520000219345093, |
| "tpp_threshold_500_unintended_diff_only": 0.058000028133392334 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.03625001013278961, |
| "tpp_threshold_2_intended_diff_only": 0.04100000858306885, |
| "tpp_threshold_2_unintended_diff_only": 0.004749998450279236, |
| "tpp_threshold_5_total_metric": 0.07475003600120544, |
| "tpp_threshold_5_intended_diff_only": 0.08100003004074097, |
| "tpp_threshold_5_unintended_diff_only": 0.0062499940395355225, |
| "tpp_threshold_10_total_metric": 0.11275003850460052, |
| "tpp_threshold_10_intended_diff_only": 0.1260000467300415, |
| "tpp_threshold_10_unintended_diff_only": 0.013250008225440979, |
| "tpp_threshold_20_total_metric": 0.1562500149011612, |
| "tpp_threshold_20_intended_diff_only": 0.18000000715255737, |
| "tpp_threshold_20_unintended_diff_only": 0.02374999225139618, |
| "tpp_threshold_50_total_metric": 0.2967500388622284, |
| "tpp_threshold_50_intended_diff_only": 0.3400000333786011, |
| "tpp_threshold_50_unintended_diff_only": 0.04324999451637268, |
| "tpp_threshold_100_total_metric": 0.3335000276565552, |
| "tpp_threshold_100_intended_diff_only": 0.4270000457763672, |
| "tpp_threshold_100_unintended_diff_only": 0.09350001811981201, |
| "tpp_threshold_500_total_metric": 0.3082500547170639, |
| "tpp_threshold_500_intended_diff_only": 0.43000006675720215, |
| "tpp_threshold_500_unintended_diff_only": 0.12175001204013824 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": 0.0012500137090682983, |
| "tpp_threshold_2_intended_diff_only": 0.003000020980834961, |
| "tpp_threshold_2_unintended_diff_only": 0.0017500072717666626, |
| "tpp_threshold_5_total_metric": 0.026500031352043152, |
| "tpp_threshold_5_intended_diff_only": 0.03900003433227539, |
| "tpp_threshold_5_unintended_diff_only": 0.012500002980232239, |
| "tpp_threshold_10_total_metric": 0.049750059843063354, |
| "tpp_threshold_10_intended_diff_only": 0.061000049114227295, |
| "tpp_threshold_10_unintended_diff_only": 0.01124998927116394, |
| "tpp_threshold_20_total_metric": 0.09600001573562622, |
| "tpp_threshold_20_intended_diff_only": 0.11300003528594971, |
| "tpp_threshold_20_unintended_diff_only": 0.017000019550323486, |
| "tpp_threshold_50_total_metric": 0.21575003862380981, |
| "tpp_threshold_50_intended_diff_only": 0.23700004816055298, |
| "tpp_threshold_50_unintended_diff_only": 0.021250009536743164, |
| "tpp_threshold_100_total_metric": 0.3122500032186508, |
| "tpp_threshold_100_intended_diff_only": 0.3360000252723694, |
| "tpp_threshold_100_unintended_diff_only": 0.023750022053718567, |
| "tpp_threshold_500_total_metric": 0.3537500500679016, |
| "tpp_threshold_500_intended_diff_only": 0.42100006341934204, |
| "tpp_threshold_500_unintended_diff_only": 0.06725001335144043 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.030750006437301636, |
| "tpp_threshold_2_intended_diff_only": 0.03600001335144043, |
| "tpp_threshold_2_unintended_diff_only": 0.005250006914138794, |
| "tpp_threshold_5_total_metric": 0.07475005090236664, |
| "tpp_threshold_5_intended_diff_only": 0.08000004291534424, |
| "tpp_threshold_5_unintended_diff_only": 0.0052499920129776, |
| "tpp_threshold_10_total_metric": 0.10100005567073822, |
| "tpp_threshold_10_intended_diff_only": 0.10700005292892456, |
| "tpp_threshold_10_unintended_diff_only": 0.00599999725818634, |
| "tpp_threshold_20_total_metric": 0.1962500661611557, |
| "tpp_threshold_20_intended_diff_only": 0.2080000638961792, |
| "tpp_threshold_20_unintended_diff_only": 0.011749997735023499, |
| "tpp_threshold_50_total_metric": 0.25600001215934753, |
| "tpp_threshold_50_intended_diff_only": 0.2710000276565552, |
| "tpp_threshold_50_unintended_diff_only": 0.015000015497207642, |
| "tpp_threshold_100_total_metric": 0.32575003802776337, |
| "tpp_threshold_100_intended_diff_only": 0.35300004482269287, |
| "tpp_threshold_100_unintended_diff_only": 0.027250006794929504, |
| "tpp_threshold_500_total_metric": 0.3800000548362732, |
| "tpp_threshold_500_intended_diff_only": 0.4160000681877136, |
| "tpp_threshold_500_unintended_diff_only": 0.03600001335144043 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.037499964237213135, |
| "tpp_threshold_2_intended_diff_only": 0.038999974727630615, |
| "tpp_threshold_2_unintended_diff_only": 0.0015000104904174805, |
| "tpp_threshold_5_total_metric": 0.07350002229213715, |
| "tpp_threshold_5_intended_diff_only": 0.07200002670288086, |
| "tpp_threshold_5_unintended_diff_only": -0.0014999955892562866, |
| "tpp_threshold_10_total_metric": 0.10149998962879181, |
| "tpp_threshold_10_intended_diff_only": 0.11100000143051147, |
| "tpp_threshold_10_unintended_diff_only": 0.009500011801719666, |
| "tpp_threshold_20_total_metric": 0.19099999964237213, |
| "tpp_threshold_20_intended_diff_only": 0.203000009059906, |
| "tpp_threshold_20_unintended_diff_only": 0.012000009417533875, |
| "tpp_threshold_50_total_metric": 0.30800001323223114, |
| "tpp_threshold_50_intended_diff_only": 0.3400000333786011, |
| "tpp_threshold_50_unintended_diff_only": 0.032000020146369934, |
| "tpp_threshold_100_total_metric": 0.31825001537799835, |
| "tpp_threshold_100_intended_diff_only": 0.3630000352859497, |
| "tpp_threshold_100_unintended_diff_only": 0.044750019907951355, |
| "tpp_threshold_500_total_metric": 0.2892500311136246, |
| "tpp_threshold_500_intended_diff_only": 0.3670000433921814, |
| "tpp_threshold_500_unintended_diff_only": 0.07775001227855682 |
| } |
| } |
| } |
| } |