| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "91ffb92c-ec01-47b1-9f45-773ec12047b3", |
| "datetime_epoch_millis": 1745753076271, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.07927500754594802, |
| "tpp_threshold_2_intended_diff_only": 0.09990001320838929, |
| "tpp_threshold_2_unintended_diff_only": 0.020625005662441253, |
| "tpp_threshold_5_total_metric": 0.14195000529289245, |
| "tpp_threshold_5_intended_diff_only": 0.1864000141620636, |
| "tpp_threshold_5_unintended_diff_only": 0.044450008869171144, |
| "tpp_threshold_10_total_metric": 0.19675001055002211, |
| "tpp_threshold_10_intended_diff_only": 0.27550001740455626, |
| "tpp_threshold_10_unintended_diff_only": 0.07875000685453415, |
| "tpp_threshold_20_total_metric": 0.20732501298189163, |
| "tpp_threshold_20_intended_diff_only": 0.32310002446174624, |
| "tpp_threshold_20_unintended_diff_only": 0.11577501147985458, |
| "tpp_threshold_50_total_metric": 0.23555000871419907, |
| "tpp_threshold_50_intended_diff_only": 0.39150002002716067, |
| "tpp_threshold_50_unintended_diff_only": 0.15595001131296157, |
| "tpp_threshold_100_total_metric": 0.24632502049207688, |
| "tpp_threshold_100_intended_diff_only": 0.4384000301361084, |
| "tpp_threshold_100_unintended_diff_only": 0.19207500964403154, |
| "tpp_threshold_500_total_metric": 0.2154250293970108, |
| "tpp_threshold_500_intended_diff_only": 0.4488000452518463, |
| "tpp_threshold_500_unintended_diff_only": 0.23337501585483553 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.1411000072956085, |
| "tpp_threshold_2_intended_diff_only": 0.16780000925064087, |
| "tpp_threshold_2_unintended_diff_only": 0.02670000195503235, |
| "tpp_threshold_5_total_metric": 0.23705001473426818, |
| "tpp_threshold_5_intended_diff_only": 0.30300002098083495, |
| "tpp_threshold_5_unintended_diff_only": 0.06595000624656677, |
| "tpp_threshold_10_total_metric": 0.27450003027915953, |
| "tpp_threshold_10_intended_diff_only": 0.4026000380516052, |
| "tpp_threshold_10_unintended_diff_only": 0.12810000777244568, |
| "tpp_threshold_20_total_metric": 0.24740002453327178, |
| "tpp_threshold_20_intended_diff_only": 0.4462000370025635, |
| "tpp_threshold_20_unintended_diff_only": 0.1988000124692917, |
| "tpp_threshold_50_total_metric": 0.19705000817775725, |
| "tpp_threshold_50_intended_diff_only": 0.4640000224113464, |
| "tpp_threshold_50_unintended_diff_only": 0.26695001423358916, |
| "tpp_threshold_100_total_metric": 0.1525000125169754, |
| "tpp_threshold_100_intended_diff_only": 0.46540002822875975, |
| "tpp_threshold_100_unintended_diff_only": 0.3129000157117844, |
| "tpp_threshold_500_total_metric": 0.08580003082752227, |
| "tpp_threshold_500_intended_diff_only": 0.4666000485420227, |
| "tpp_threshold_500_unintended_diff_only": 0.38080001771450045 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.017450007796287536, |
| "tpp_threshold_2_intended_diff_only": 0.03200001716613769, |
| "tpp_threshold_2_unintended_diff_only": 0.014550009369850158, |
| "tpp_threshold_5_total_metric": 0.046849995851516724, |
| "tpp_threshold_5_intended_diff_only": 0.06980000734329224, |
| "tpp_threshold_5_unintended_diff_only": 0.022950011491775512, |
| "tpp_threshold_10_total_metric": 0.11899999082088471, |
| "tpp_threshold_10_intended_diff_only": 0.1483999967575073, |
| "tpp_threshold_10_unintended_diff_only": 0.02940000593662262, |
| "tpp_threshold_20_total_metric": 0.16725000143051147, |
| "tpp_threshold_20_intended_diff_only": 0.20000001192092895, |
| "tpp_threshold_20_unintended_diff_only": 0.03275001049041748, |
| "tpp_threshold_50_total_metric": 0.2740500092506409, |
| "tpp_threshold_50_intended_diff_only": 0.31900001764297486, |
| "tpp_threshold_50_unintended_diff_only": 0.044950008392333984, |
| "tpp_threshold_100_total_metric": 0.34015002846717834, |
| "tpp_threshold_100_intended_diff_only": 0.41140003204345704, |
| "tpp_threshold_100_unintended_diff_only": 0.07125000357627868, |
| "tpp_threshold_500_total_metric": 0.3450500279664993, |
| "tpp_threshold_500_intended_diff_only": 0.4310000419616699, |
| "tpp_threshold_500_unintended_diff_only": 0.08595001399517059 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.23.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.23.hook_resid_post", |
| "hook_layer": 23, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.1822500377893448, |
| "tpp_threshold_2_intended_diff_only": 0.20600003004074097, |
| "tpp_threshold_2_unintended_diff_only": 0.02374999225139618, |
| "tpp_threshold_5_total_metric": 0.19825004041194916, |
| "tpp_threshold_5_intended_diff_only": 0.27000004053115845, |
| "tpp_threshold_5_unintended_diff_only": 0.07175000011920929, |
| "tpp_threshold_10_total_metric": 0.24475006759166718, |
| "tpp_threshold_10_intended_diff_only": 0.4020000696182251, |
| "tpp_threshold_10_unintended_diff_only": 0.15725000202655792, |
| "tpp_threshold_20_total_metric": 0.2225000262260437, |
| "tpp_threshold_20_intended_diff_only": 0.4360000491142273, |
| "tpp_threshold_20_unintended_diff_only": 0.2135000228881836, |
| "tpp_threshold_50_total_metric": 0.16175003349781036, |
| "tpp_threshold_50_intended_diff_only": 0.4500000476837158, |
| "tpp_threshold_50_unintended_diff_only": 0.28825001418590546, |
| "tpp_threshold_100_total_metric": 0.11375004053115845, |
| "tpp_threshold_100_intended_diff_only": 0.4530000686645508, |
| "tpp_threshold_100_unintended_diff_only": 0.33925002813339233, |
| "tpp_threshold_500_total_metric": 0.059250056743621826, |
| "tpp_threshold_500_intended_diff_only": 0.4530000686645508, |
| "tpp_threshold_500_unintended_diff_only": 0.39375001192092896 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.1679999828338623, |
| "tpp_threshold_2_intended_diff_only": 0.19900000095367432, |
| "tpp_threshold_2_unintended_diff_only": 0.03100001811981201, |
| "tpp_threshold_5_total_metric": 0.218999981880188, |
| "tpp_threshold_5_intended_diff_only": 0.2919999957084656, |
| "tpp_threshold_5_unintended_diff_only": 0.07300001382827759, |
| "tpp_threshold_10_total_metric": 0.2615000009536743, |
| "tpp_threshold_10_intended_diff_only": 0.3790000081062317, |
| "tpp_threshold_10_unintended_diff_only": 0.11750000715255737, |
| "tpp_threshold_20_total_metric": 0.22625000774860382, |
| "tpp_threshold_20_intended_diff_only": 0.43800002336502075, |
| "tpp_threshold_20_unintended_diff_only": 0.21175001561641693, |
| "tpp_threshold_50_total_metric": 0.16074998676776886, |
| "tpp_threshold_50_intended_diff_only": 0.4580000042915344, |
| "tpp_threshold_50_unintended_diff_only": 0.29725001752376556, |
| "tpp_threshold_100_total_metric": 0.11749996244907379, |
| "tpp_threshold_100_intended_diff_only": 0.45899999141693115, |
| "tpp_threshold_100_unintended_diff_only": 0.34150002896785736, |
| "tpp_threshold_500_total_metric": 0.06824998557567596, |
| "tpp_threshold_500_intended_diff_only": 0.4610000252723694, |
| "tpp_threshold_500_unintended_diff_only": 0.3927500396966934 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.17925003170967102, |
| "tpp_threshold_2_intended_diff_only": 0.2160000205039978, |
| "tpp_threshold_2_unintended_diff_only": 0.03674998879432678, |
| "tpp_threshold_5_total_metric": 0.27225005626678467, |
| "tpp_threshold_5_intended_diff_only": 0.3240000605583191, |
| "tpp_threshold_5_unintended_diff_only": 0.051750004291534424, |
| "tpp_threshold_10_total_metric": 0.2792500704526901, |
| "tpp_threshold_10_intended_diff_only": 0.3840000629425049, |
| "tpp_threshold_10_unintended_diff_only": 0.10474999248981476, |
| "tpp_threshold_20_total_metric": 0.24075005948543549, |
| "tpp_threshold_20_intended_diff_only": 0.42100006341934204, |
| "tpp_threshold_20_unintended_diff_only": 0.18025000393390656, |
| "tpp_threshold_50_total_metric": 0.18400005996227264, |
| "tpp_threshold_50_intended_diff_only": 0.44800007343292236, |
| "tpp_threshold_50_unintended_diff_only": 0.2640000134706497, |
| "tpp_threshold_100_total_metric": 0.14500005543231964, |
| "tpp_threshold_100_intended_diff_only": 0.4500000476837158, |
| "tpp_threshold_100_unintended_diff_only": 0.3049999922513962, |
| "tpp_threshold_500_total_metric": 0.05425006151199341, |
| "tpp_threshold_500_intended_diff_only": 0.4530000686645508, |
| "tpp_threshold_500_unintended_diff_only": 0.3987500071525574 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.06999997794628143, |
| "tpp_threshold_2_intended_diff_only": 0.08899998664855957, |
| "tpp_threshold_2_unintended_diff_only": 0.019000008702278137, |
| "tpp_threshold_5_total_metric": 0.2590000331401825, |
| "tpp_threshold_5_intended_diff_only": 0.30800002813339233, |
| "tpp_threshold_5_unintended_diff_only": 0.04899999499320984, |
| "tpp_threshold_10_total_metric": 0.33525002002716064, |
| "tpp_threshold_10_intended_diff_only": 0.44700002670288086, |
| "tpp_threshold_10_unintended_diff_only": 0.11175000667572021, |
| "tpp_threshold_20_total_metric": 0.29350003600120544, |
| "tpp_threshold_20_intended_diff_only": 0.4880000352859497, |
| "tpp_threshold_20_unintended_diff_only": 0.19449999928474426, |
| "tpp_threshold_50_total_metric": 0.2199999839067459, |
| "tpp_threshold_50_intended_diff_only": 0.4909999966621399, |
| "tpp_threshold_50_unintended_diff_only": 0.271000012755394, |
| "tpp_threshold_100_total_metric": 0.20524998009204865, |
| "tpp_threshold_100_intended_diff_only": 0.4909999966621399, |
| "tpp_threshold_100_unintended_diff_only": 0.28575001657009125, |
| "tpp_threshold_500_total_metric": 0.14900003373622894, |
| "tpp_threshold_500_intended_diff_only": 0.4920000433921814, |
| "tpp_threshold_500_unintended_diff_only": 0.34300000965595245 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.10600000619888306, |
| "tpp_threshold_2_intended_diff_only": 0.1290000081062317, |
| "tpp_threshold_2_unintended_diff_only": 0.023000001907348633, |
| "tpp_threshold_5_total_metric": 0.23674996197223663, |
| "tpp_threshold_5_intended_diff_only": 0.32099997997283936, |
| "tpp_threshold_5_unintended_diff_only": 0.08425001800060272, |
| "tpp_threshold_10_total_metric": 0.25174999237060547, |
| "tpp_threshold_10_intended_diff_only": 0.4010000228881836, |
| "tpp_threshold_10_unintended_diff_only": 0.14925003051757812, |
| "tpp_threshold_20_total_metric": 0.2539999932050705, |
| "tpp_threshold_20_intended_diff_only": 0.4480000138282776, |
| "tpp_threshold_20_unintended_diff_only": 0.1940000206232071, |
| "tpp_threshold_50_total_metric": 0.25874997675418854, |
| "tpp_threshold_50_intended_diff_only": 0.4729999899864197, |
| "tpp_threshold_50_unintended_diff_only": 0.21425001323223114, |
| "tpp_threshold_100_total_metric": 0.1810000240802765, |
| "tpp_threshold_100_intended_diff_only": 0.4740000367164612, |
| "tpp_threshold_100_unintended_diff_only": 0.2930000126361847, |
| "tpp_threshold_500_total_metric": 0.09825001657009125, |
| "tpp_threshold_500_intended_diff_only": 0.4740000367164612, |
| "tpp_threshold_500_unintended_diff_only": 0.37575002014636993 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": -0.002500012516975403, |
| "tpp_threshold_2_intended_diff_only": 0.013999998569488525, |
| "tpp_threshold_2_unintended_diff_only": 0.016500011086463928, |
| "tpp_threshold_5_total_metric": 0.022249966859817505, |
| "tpp_threshold_5_intended_diff_only": 0.03799998760223389, |
| "tpp_threshold_5_unintended_diff_only": 0.015750020742416382, |
| "tpp_threshold_10_total_metric": 0.05124998092651367, |
| "tpp_threshold_10_intended_diff_only": 0.06499999761581421, |
| "tpp_threshold_10_unintended_diff_only": 0.013750016689300537, |
| "tpp_threshold_20_total_metric": 0.09974999725818634, |
| "tpp_threshold_20_intended_diff_only": 0.12099999189376831, |
| "tpp_threshold_20_unintended_diff_only": 0.02124999463558197, |
| "tpp_threshold_50_total_metric": 0.2747499793767929, |
| "tpp_threshold_50_intended_diff_only": 0.296999990940094, |
| "tpp_threshold_50_unintended_diff_only": 0.022250011563301086, |
| "tpp_threshold_100_total_metric": 0.3762500286102295, |
| "tpp_threshold_100_intended_diff_only": 0.4140000343322754, |
| "tpp_threshold_100_unintended_diff_only": 0.0377500057220459, |
| "tpp_threshold_500_total_metric": 0.41075001657009125, |
| "tpp_threshold_500_intended_diff_only": 0.4570000171661377, |
| "tpp_threshold_500_unintended_diff_only": 0.04625000059604645 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.02199995517730713, |
| "tpp_threshold_2_intended_diff_only": 0.0339999794960022, |
| "tpp_threshold_2_unintended_diff_only": 0.012000024318695068, |
| "tpp_threshold_5_total_metric": 0.05449998378753662, |
| "tpp_threshold_5_intended_diff_only": 0.10600000619888306, |
| "tpp_threshold_5_unintended_diff_only": 0.051500022411346436, |
| "tpp_threshold_10_total_metric": 0.20774997770786285, |
| "tpp_threshold_10_intended_diff_only": 0.2709999680519104, |
| "tpp_threshold_10_unintended_diff_only": 0.06324999034404755, |
| "tpp_threshold_20_total_metric": 0.28999997675418854, |
| "tpp_threshold_20_intended_diff_only": 0.35199999809265137, |
| "tpp_threshold_20_unintended_diff_only": 0.06200002133846283, |
| "tpp_threshold_50_total_metric": 0.33024999499320984, |
| "tpp_threshold_50_intended_diff_only": 0.41100001335144043, |
| "tpp_threshold_50_unintended_diff_only": 0.08075001835823059, |
| "tpp_threshold_100_total_metric": 0.34550000727176666, |
| "tpp_threshold_100_intended_diff_only": 0.4390000104904175, |
| "tpp_threshold_100_unintended_diff_only": 0.09350000321865082, |
| "tpp_threshold_500_total_metric": 0.35224999487400055, |
| "tpp_threshold_500_intended_diff_only": 0.43800002336502075, |
| "tpp_threshold_500_unintended_diff_only": 0.0857500284910202 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": -0.011499971151351929, |
| "tpp_threshold_2_intended_diff_only": -0.001999974250793457, |
| "tpp_threshold_2_unintended_diff_only": 0.009499996900558472, |
| "tpp_threshold_5_total_metric": -0.00700002908706665, |
| "tpp_threshold_5_intended_diff_only": 0.001999974250793457, |
| "tpp_threshold_5_unintended_diff_only": 0.009000003337860107, |
| "tpp_threshold_10_total_metric": 0.023249968886375427, |
| "tpp_threshold_10_intended_diff_only": 0.029999971389770508, |
| "tpp_threshold_10_unintended_diff_only": 0.006750002503395081, |
| "tpp_threshold_20_total_metric": 0.029749959707260132, |
| "tpp_threshold_20_intended_diff_only": 0.042999982833862305, |
| "tpp_threshold_20_unintended_diff_only": 0.013250023126602173, |
| "tpp_threshold_50_total_metric": 0.15425001084804535, |
| "tpp_threshold_50_intended_diff_only": 0.18900001049041748, |
| "tpp_threshold_50_unintended_diff_only": 0.03474999964237213, |
| "tpp_threshold_100_total_metric": 0.2835000306367874, |
| "tpp_threshold_100_intended_diff_only": 0.4180000424385071, |
| "tpp_threshold_100_unintended_diff_only": 0.13450001180171967, |
| "tpp_threshold_500_total_metric": 0.2682500332593918, |
| "tpp_threshold_500_intended_diff_only": 0.4320000410079956, |
| "tpp_threshold_500_unintended_diff_only": 0.16375000774860382 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.03150002658367157, |
| "tpp_threshold_2_intended_diff_only": 0.03400003910064697, |
| "tpp_threshold_2_unintended_diff_only": 0.002500012516975403, |
| "tpp_threshold_5_total_metric": 0.09250001609325409, |
| "tpp_threshold_5_intended_diff_only": 0.10000002384185791, |
| "tpp_threshold_5_unintended_diff_only": 0.007500007748603821, |
| "tpp_threshold_10_total_metric": 0.18699999153614044, |
| "tpp_threshold_10_intended_diff_only": 0.21299999952316284, |
| "tpp_threshold_10_unintended_diff_only": 0.0260000079870224, |
| "tpp_threshold_20_total_metric": 0.2395000159740448, |
| "tpp_threshold_20_intended_diff_only": 0.26200002431869507, |
| "tpp_threshold_20_unintended_diff_only": 0.02250000834465027, |
| "tpp_threshold_50_total_metric": 0.3395000547170639, |
| "tpp_threshold_50_intended_diff_only": 0.36600005626678467, |
| "tpp_threshold_50_unintended_diff_only": 0.026500001549720764, |
| "tpp_threshold_100_total_metric": 0.37925004959106445, |
| "tpp_threshold_100_intended_diff_only": 0.40800005197525024, |
| "tpp_threshold_100_unintended_diff_only": 0.02875000238418579, |
| "tpp_threshold_500_total_metric": 0.379750058054924, |
| "tpp_threshold_500_intended_diff_only": 0.43500006198883057, |
| "tpp_threshold_500_unintended_diff_only": 0.055250003933906555 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.047750040888786316, |
| "tpp_threshold_2_intended_diff_only": 0.08000004291534424, |
| "tpp_threshold_2_unintended_diff_only": 0.03225000202655792, |
| "tpp_threshold_5_total_metric": 0.07200004160404205, |
| "tpp_threshold_5_intended_diff_only": 0.10300004482269287, |
| "tpp_threshold_5_unintended_diff_only": 0.031000003218650818, |
| "tpp_threshold_10_total_metric": 0.12575003504753113, |
| "tpp_threshold_10_intended_diff_only": 0.16300004720687866, |
| "tpp_threshold_10_unintended_diff_only": 0.037250012159347534, |
| "tpp_threshold_20_total_metric": 0.17725005745887756, |
| "tpp_threshold_20_intended_diff_only": 0.22200006246566772, |
| "tpp_threshold_20_unintended_diff_only": 0.04475000500679016, |
| "tpp_threshold_50_total_metric": 0.27150000631809235, |
| "tpp_threshold_50_intended_diff_only": 0.3320000171661377, |
| "tpp_threshold_50_unintended_diff_only": 0.06050001084804535, |
| "tpp_threshold_100_total_metric": 0.3162500262260437, |
| "tpp_threshold_100_intended_diff_only": 0.37800002098083496, |
| "tpp_threshold_100_unintended_diff_only": 0.06174999475479126, |
| "tpp_threshold_500_total_metric": 0.31425003707408905, |
| "tpp_threshold_500_intended_diff_only": 0.393000066280365, |
| "tpp_threshold_500_unintended_diff_only": 0.07875002920627594 |
| } |
| } |
| } |
| } |