| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "65b37170-42a3-4f63-bfdc-b4728a912b3a", |
| "datetime_epoch_millis": 1745619989291, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.06834999024868012, |
| "tpp_threshold_2_intended_diff_only": 0.0835999846458435, |
| "tpp_threshold_2_unintended_diff_only": 0.015249994397163392, |
| "tpp_threshold_5_total_metric": 0.15797501504421235, |
| "tpp_threshold_5_intended_diff_only": 0.19630001187324525, |
| "tpp_threshold_5_unintended_diff_only": 0.0383249968290329, |
| "tpp_threshold_10_total_metric": 0.18905001282691958, |
| "tpp_threshold_10_intended_diff_only": 0.2542000114917755, |
| "tpp_threshold_10_unintended_diff_only": 0.06514999866485596, |
| "tpp_threshold_20_total_metric": 0.18907500952482223, |
| "tpp_threshold_20_intended_diff_only": 0.3008000075817108, |
| "tpp_threshold_20_unintended_diff_only": 0.11172499805688858, |
| "tpp_threshold_50_total_metric": 0.22695002406835557, |
| "tpp_threshold_50_intended_diff_only": 0.38240002393722533, |
| "tpp_threshold_50_unintended_diff_only": 0.15544999986886976, |
| "tpp_threshold_100_total_metric": 0.24377500563859938, |
| "tpp_threshold_100_intended_diff_only": 0.4292000114917755, |
| "tpp_threshold_100_unintended_diff_only": 0.18542500585317612, |
| "tpp_threshold_500_total_metric": 0.21352503299713135, |
| "tpp_threshold_500_intended_diff_only": 0.45220003724098207, |
| "tpp_threshold_500_unintended_diff_only": 0.2386750042438507 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.11694998741149902, |
| "tpp_threshold_2_intended_diff_only": 0.13879998922348022, |
| "tpp_threshold_2_unintended_diff_only": 0.021850001811981202, |
| "tpp_threshold_5_total_metric": 0.2637000232934952, |
| "tpp_threshold_5_intended_diff_only": 0.32420002222061156, |
| "tpp_threshold_5_unintended_diff_only": 0.0604999989271164, |
| "tpp_threshold_10_total_metric": 0.2847500115633011, |
| "tpp_threshold_10_intended_diff_only": 0.3958000183105469, |
| "tpp_threshold_10_unintended_diff_only": 0.11105000674724579, |
| "tpp_threshold_20_total_metric": 0.23505001068115233, |
| "tpp_threshold_20_intended_diff_only": 0.43280001878738406, |
| "tpp_threshold_20_unintended_diff_only": 0.1977500081062317, |
| "tpp_threshold_50_total_metric": 0.1851000279188156, |
| "tpp_threshold_50_intended_diff_only": 0.4618000388145447, |
| "tpp_threshold_50_unintended_diff_only": 0.27670001089572904, |
| "tpp_threshold_100_total_metric": 0.15394999384880065, |
| "tpp_threshold_100_intended_diff_only": 0.46560001373291016, |
| "tpp_threshold_100_unintended_diff_only": 0.3116500198841095, |
| "tpp_threshold_500_total_metric": 0.09100003242492676, |
| "tpp_threshold_500_intended_diff_only": 0.46680004596710206, |
| "tpp_threshold_500_unintended_diff_only": 0.3758000135421753 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.019749993085861207, |
| "tpp_threshold_2_intended_diff_only": 0.028399980068206786, |
| "tpp_threshold_2_unintended_diff_only": 0.008649986982345582, |
| "tpp_threshold_5_total_metric": 0.052250006794929506, |
| "tpp_threshold_5_intended_diff_only": 0.06840000152587891, |
| "tpp_threshold_5_unintended_diff_only": 0.016149994730949403, |
| "tpp_threshold_10_total_metric": 0.09335001409053803, |
| "tpp_threshold_10_intended_diff_only": 0.11260000467300416, |
| "tpp_threshold_10_unintended_diff_only": 0.019249990582466125, |
| "tpp_threshold_20_total_metric": 0.14310000836849213, |
| "tpp_threshold_20_intended_diff_only": 0.1687999963760376, |
| "tpp_threshold_20_unintended_diff_only": 0.02569998800754547, |
| "tpp_threshold_50_total_metric": 0.2688000202178955, |
| "tpp_threshold_50_intended_diff_only": 0.303000009059906, |
| "tpp_threshold_50_unintended_diff_only": 0.0341999888420105, |
| "tpp_threshold_100_total_metric": 0.3336000174283981, |
| "tpp_threshold_100_intended_diff_only": 0.39280000925064085, |
| "tpp_threshold_100_unintended_diff_only": 0.05919999182224274, |
| "tpp_threshold_500_total_metric": 0.33605003356933594, |
| "tpp_threshold_500_intended_diff_only": 0.43760002851486207, |
| "tpp_threshold_500_unintended_diff_only": 0.10154999494552612 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.20.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-snap-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.20.hook_resid_post", |
| "hook_layer": 20, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.2072499841451645, |
| "tpp_threshold_2_intended_diff_only": 0.2639999985694885, |
| "tpp_threshold_2_unintended_diff_only": 0.056750014424324036, |
| "tpp_threshold_5_total_metric": 0.2617500126361847, |
| "tpp_threshold_5_intended_diff_only": 0.3460000157356262, |
| "tpp_threshold_5_unintended_diff_only": 0.08425000309944153, |
| "tpp_threshold_10_total_metric": 0.2797500491142273, |
| "tpp_threshold_10_intended_diff_only": 0.3760000467300415, |
| "tpp_threshold_10_unintended_diff_only": 0.09624999761581421, |
| "tpp_threshold_20_total_metric": 0.20875000953674316, |
| "tpp_threshold_20_intended_diff_only": 0.42000001668930054, |
| "tpp_threshold_20_unintended_diff_only": 0.21125000715255737, |
| "tpp_threshold_50_total_metric": 0.14775000512599945, |
| "tpp_threshold_50_intended_diff_only": 0.44300001859664917, |
| "tpp_threshold_50_unintended_diff_only": 0.2952500134706497, |
| "tpp_threshold_100_total_metric": 0.1184999942779541, |
| "tpp_threshold_100_intended_diff_only": 0.4480000138282776, |
| "tpp_threshold_100_unintended_diff_only": 0.3295000195503235, |
| "tpp_threshold_500_total_metric": 0.055750057101249695, |
| "tpp_threshold_500_intended_diff_only": 0.4490000605583191, |
| "tpp_threshold_500_unintended_diff_only": 0.3932500034570694 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.12600000202655792, |
| "tpp_threshold_2_intended_diff_only": 0.15799999237060547, |
| "tpp_threshold_2_unintended_diff_only": 0.031999990344047546, |
| "tpp_threshold_5_total_metric": 0.1975000500679016, |
| "tpp_threshold_5_intended_diff_only": 0.2510000467300415, |
| "tpp_threshold_5_unintended_diff_only": 0.05349999666213989, |
| "tpp_threshold_10_total_metric": 0.21300001442432404, |
| "tpp_threshold_10_intended_diff_only": 0.34200000762939453, |
| "tpp_threshold_10_unintended_diff_only": 0.1289999932050705, |
| "tpp_threshold_20_total_metric": 0.19225002825260162, |
| "tpp_threshold_20_intended_diff_only": 0.3960000276565552, |
| "tpp_threshold_20_unintended_diff_only": 0.20374999940395355, |
| "tpp_threshold_50_total_metric": 0.17050005495548248, |
| "tpp_threshold_50_intended_diff_only": 0.4490000605583191, |
| "tpp_threshold_50_unintended_diff_only": 0.2785000056028366, |
| "tpp_threshold_100_total_metric": 0.11775000393390656, |
| "tpp_threshold_100_intended_diff_only": 0.4610000252723694, |
| "tpp_threshold_100_unintended_diff_only": 0.34325002133846283, |
| "tpp_threshold_500_total_metric": 0.06300005316734314, |
| "tpp_threshold_500_intended_diff_only": 0.4630000591278076, |
| "tpp_threshold_500_unintended_diff_only": 0.4000000059604645 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.06599998474121094, |
| "tpp_threshold_2_intended_diff_only": 0.07499998807907104, |
| "tpp_threshold_2_unintended_diff_only": 0.009000003337860107, |
| "tpp_threshold_5_total_metric": 0.229249969124794, |
| "tpp_threshold_5_intended_diff_only": 0.32999998331069946, |
| "tpp_threshold_5_unintended_diff_only": 0.10075001418590546, |
| "tpp_threshold_10_total_metric": 0.2177499681711197, |
| "tpp_threshold_10_intended_diff_only": 0.38499999046325684, |
| "tpp_threshold_10_unintended_diff_only": 0.16725002229213715, |
| "tpp_threshold_20_total_metric": 0.18924999237060547, |
| "tpp_threshold_20_intended_diff_only": 0.41200000047683716, |
| "tpp_threshold_20_unintended_diff_only": 0.2227500081062317, |
| "tpp_threshold_50_total_metric": 0.14800001680850983, |
| "tpp_threshold_50_intended_diff_only": 0.44700002670288086, |
| "tpp_threshold_50_unintended_diff_only": 0.29900000989437103, |
| "tpp_threshold_100_total_metric": 0.11549995839595795, |
| "tpp_threshold_100_intended_diff_only": 0.44999998807907104, |
| "tpp_threshold_100_unintended_diff_only": 0.3345000296831131, |
| "tpp_threshold_500_total_metric": 0.056249991059303284, |
| "tpp_threshold_500_intended_diff_only": 0.4520000219345093, |
| "tpp_threshold_500_unintended_diff_only": 0.395750030875206 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.16249999403953552, |
| "tpp_threshold_2_intended_diff_only": 0.16699999570846558, |
| "tpp_threshold_2_unintended_diff_only": 0.004500001668930054, |
| "tpp_threshold_5_total_metric": 0.39225006103515625, |
| "tpp_threshold_5_intended_diff_only": 0.41700005531311035, |
| "tpp_threshold_5_unintended_diff_only": 0.0247499942779541, |
| "tpp_threshold_10_total_metric": 0.41325005888938904, |
| "tpp_threshold_10_intended_diff_only": 0.48100006580352783, |
| "tpp_threshold_10_unintended_diff_only": 0.0677500069141388, |
| "tpp_threshold_20_total_metric": 0.3202500492334366, |
| "tpp_threshold_20_intended_diff_only": 0.49500006437301636, |
| "tpp_threshold_20_unintended_diff_only": 0.17475001513957977, |
| "tpp_threshold_50_total_metric": 0.24375005066394806, |
| "tpp_threshold_50_intended_diff_only": 0.49500006437301636, |
| "tpp_threshold_50_unintended_diff_only": 0.2512500137090683, |
| "tpp_threshold_100_total_metric": 0.24575002491474152, |
| "tpp_threshold_100_intended_diff_only": 0.49400001764297485, |
| "tpp_threshold_100_unintended_diff_only": 0.24824999272823334, |
| "tpp_threshold_500_total_metric": 0.16350005567073822, |
| "tpp_threshold_500_intended_diff_only": 0.49500006437301636, |
| "tpp_threshold_500_unintended_diff_only": 0.33150000870227814 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.022999972105026245, |
| "tpp_threshold_2_intended_diff_only": 0.029999971389770508, |
| "tpp_threshold_2_unintended_diff_only": 0.006999999284744263, |
| "tpp_threshold_5_total_metric": 0.23775002360343933, |
| "tpp_threshold_5_intended_diff_only": 0.2770000100135803, |
| "tpp_threshold_5_unintended_diff_only": 0.03924998641014099, |
| "tpp_threshold_10_total_metric": 0.2999999672174454, |
| "tpp_threshold_10_intended_diff_only": 0.39499998092651367, |
| "tpp_threshold_10_unintended_diff_only": 0.0950000137090683, |
| "tpp_threshold_20_total_metric": 0.2647499740123749, |
| "tpp_threshold_20_intended_diff_only": 0.44099998474121094, |
| "tpp_threshold_20_unintended_diff_only": 0.17625001072883606, |
| "tpp_threshold_50_total_metric": 0.21550001204013824, |
| "tpp_threshold_50_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_50_unintended_diff_only": 0.25950001180171967, |
| "tpp_threshold_100_total_metric": 0.17224998772144318, |
| "tpp_threshold_100_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_100_unintended_diff_only": 0.30275003612041473, |
| "tpp_threshold_500_total_metric": 0.11650000512599945, |
| "tpp_threshold_500_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_500_unintended_diff_only": 0.35850001871585846 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": -0.0045000165700912476, |
| "tpp_threshold_2_intended_diff_only": 0.001999974250793457, |
| "tpp_threshold_2_unintended_diff_only": 0.006499990820884705, |
| "tpp_threshold_5_total_metric": 0.00024999678134918213, |
| "tpp_threshold_5_intended_diff_only": 0.009999990463256836, |
| "tpp_threshold_5_unintended_diff_only": 0.009749993681907654, |
| "tpp_threshold_10_total_metric": 0.009000018239021301, |
| "tpp_threshold_10_intended_diff_only": 0.023000001907348633, |
| "tpp_threshold_10_unintended_diff_only": 0.013999983668327332, |
| "tpp_threshold_20_total_metric": 0.036250039935112, |
| "tpp_threshold_20_intended_diff_only": 0.050000011920928955, |
| "tpp_threshold_20_unintended_diff_only": 0.013749971985816956, |
| "tpp_threshold_50_total_metric": 0.1352500021457672, |
| "tpp_threshold_50_intended_diff_only": 0.15799999237060547, |
| "tpp_threshold_50_unintended_diff_only": 0.022749990224838257, |
| "tpp_threshold_100_total_metric": 0.2795000374317169, |
| "tpp_threshold_100_intended_diff_only": 0.30400002002716064, |
| "tpp_threshold_100_unintended_diff_only": 0.024499982595443726, |
| "tpp_threshold_500_total_metric": 0.4072500318288803, |
| "tpp_threshold_500_intended_diff_only": 0.453000009059906, |
| "tpp_threshold_500_unintended_diff_only": 0.045749977231025696 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.04825003445148468, |
| "tpp_threshold_2_intended_diff_only": 0.06800001859664917, |
| "tpp_threshold_2_unintended_diff_only": 0.01974998414516449, |
| "tpp_threshold_5_total_metric": 0.12025003135204315, |
| "tpp_threshold_5_intended_diff_only": 0.1420000195503235, |
| "tpp_threshold_5_unintended_diff_only": 0.021749988198280334, |
| "tpp_threshold_10_total_metric": 0.2045000195503235, |
| "tpp_threshold_10_intended_diff_only": 0.23000001907348633, |
| "tpp_threshold_10_unintended_diff_only": 0.025499999523162842, |
| "tpp_threshold_20_total_metric": 0.2879999876022339, |
| "tpp_threshold_20_intended_diff_only": 0.3199999928474426, |
| "tpp_threshold_20_unintended_diff_only": 0.03200000524520874, |
| "tpp_threshold_50_total_metric": 0.4022500216960907, |
| "tpp_threshold_50_intended_diff_only": 0.44300001859664917, |
| "tpp_threshold_50_unintended_diff_only": 0.04074999690055847, |
| "tpp_threshold_100_total_metric": 0.3904999941587448, |
| "tpp_threshold_100_intended_diff_only": 0.44999998807907104, |
| "tpp_threshold_100_unintended_diff_only": 0.05949999392032623, |
| "tpp_threshold_500_total_metric": 0.3707500398159027, |
| "tpp_threshold_500_intended_diff_only": 0.45100003480911255, |
| "tpp_threshold_500_unintended_diff_only": 0.08024999499320984 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": -1.4901161193847656e-08, |
| "tpp_threshold_2_intended_diff_only": 0.001999974250793457, |
| "tpp_threshold_2_unintended_diff_only": 0.001999989151954651, |
| "tpp_threshold_5_total_metric": -0.001749977469444275, |
| "tpp_threshold_5_intended_diff_only": 0.009000003337860107, |
| "tpp_threshold_5_unintended_diff_only": 0.010749980807304382, |
| "tpp_threshold_10_total_metric": 0.007249996066093445, |
| "tpp_threshold_10_intended_diff_only": 0.018999993801116943, |
| "tpp_threshold_10_unintended_diff_only": 0.011749997735023499, |
| "tpp_threshold_20_total_metric": 0.05824999511241913, |
| "tpp_threshold_20_intended_diff_only": 0.07999998331069946, |
| "tpp_threshold_20_unintended_diff_only": 0.021749988198280334, |
| "tpp_threshold_50_total_metric": 0.20674999058246613, |
| "tpp_threshold_50_intended_diff_only": 0.23799997568130493, |
| "tpp_threshold_50_unintended_diff_only": 0.031249985098838806, |
| "tpp_threshold_100_total_metric": 0.2902500033378601, |
| "tpp_threshold_100_intended_diff_only": 0.40700000524520874, |
| "tpp_threshold_100_unintended_diff_only": 0.11675000190734863, |
| "tpp_threshold_500_total_metric": 0.2900000214576721, |
| "tpp_threshold_500_intended_diff_only": 0.437000036239624, |
| "tpp_threshold_500_unintended_diff_only": 0.1470000147819519 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.035499975085258484, |
| "tpp_threshold_2_intended_diff_only": 0.04499995708465576, |
| "tpp_threshold_2_unintended_diff_only": 0.009499981999397278, |
| "tpp_threshold_5_total_metric": 0.10374999046325684, |
| "tpp_threshold_5_intended_diff_only": 0.12099999189376831, |
| "tpp_threshold_5_unintended_diff_only": 0.017250001430511475, |
| "tpp_threshold_10_total_metric": 0.1744999885559082, |
| "tpp_threshold_10_intended_diff_only": 0.19099998474121094, |
| "tpp_threshold_10_unintended_diff_only": 0.016499996185302734, |
| "tpp_threshold_20_total_metric": 0.2122500240802765, |
| "tpp_threshold_20_intended_diff_only": 0.24000000953674316, |
| "tpp_threshold_20_unintended_diff_only": 0.027749985456466675, |
| "tpp_threshold_50_total_metric": 0.302000030875206, |
| "tpp_threshold_50_intended_diff_only": 0.3360000252723694, |
| "tpp_threshold_50_unintended_diff_only": 0.03399999439716339, |
| "tpp_threshold_100_total_metric": 0.36225004494190216, |
| "tpp_threshold_100_intended_diff_only": 0.4050000309944153, |
| "tpp_threshold_100_unintended_diff_only": 0.04274998605251312, |
| "tpp_threshold_500_total_metric": 0.29500003159046173, |
| "tpp_threshold_500_intended_diff_only": 0.43300002813339233, |
| "tpp_threshold_500_unintended_diff_only": 0.1379999965429306 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.019499987363815308, |
| "tpp_threshold_2_intended_diff_only": 0.02499997615814209, |
| "tpp_threshold_2_unintended_diff_only": 0.005499988794326782, |
| "tpp_threshold_5_total_metric": 0.03874999284744263, |
| "tpp_threshold_5_intended_diff_only": 0.06000000238418579, |
| "tpp_threshold_5_unintended_diff_only": 0.021250009536743164, |
| "tpp_threshold_10_total_metric": 0.07150004804134369, |
| "tpp_threshold_10_intended_diff_only": 0.10000002384185791, |
| "tpp_threshold_10_unintended_diff_only": 0.02849997580051422, |
| "tpp_threshold_20_total_metric": 0.12074999511241913, |
| "tpp_threshold_20_intended_diff_only": 0.15399998426437378, |
| "tpp_threshold_20_unintended_diff_only": 0.03324998915195465, |
| "tpp_threshold_50_total_metric": 0.2977500557899475, |
| "tpp_threshold_50_intended_diff_only": 0.3400000333786011, |
| "tpp_threshold_50_unintended_diff_only": 0.042249977588653564, |
| "tpp_threshold_100_total_metric": 0.34550000727176666, |
| "tpp_threshold_100_intended_diff_only": 0.39800000190734863, |
| "tpp_threshold_100_unintended_diff_only": 0.05249999463558197, |
| "tpp_threshold_500_total_metric": 0.3172500431537628, |
| "tpp_threshold_500_intended_diff_only": 0.4140000343322754, |
| "tpp_threshold_500_unintended_diff_only": 0.09674999117851257 |
| } |
| } |
| } |
| } |