| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "65b37170-42a3-4f63-bfdc-b4728a912b3a", |
| "datetime_epoch_millis": 1745617561806, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.06512500941753388, |
| "tpp_threshold_2_intended_diff_only": 0.09570001959800721, |
| "tpp_threshold_2_unintended_diff_only": 0.030575010180473327, |
| "tpp_threshold_5_total_metric": 0.1294250175356865, |
| "tpp_threshold_5_intended_diff_only": 0.20870002508163452, |
| "tpp_threshold_5_unintended_diff_only": 0.07927500754594802, |
| "tpp_threshold_10_total_metric": 0.14547500908374786, |
| "tpp_threshold_10_intended_diff_only": 0.27040002942085267, |
| "tpp_threshold_10_unintended_diff_only": 0.1249250203371048, |
| "tpp_threshold_20_total_metric": 0.18765000402927398, |
| "tpp_threshold_20_intended_diff_only": 0.3374000251293182, |
| "tpp_threshold_20_unintended_diff_only": 0.14975002110004423, |
| "tpp_threshold_50_total_metric": 0.2245750144124031, |
| "tpp_threshold_50_intended_diff_only": 0.4153000354766846, |
| "tpp_threshold_50_unintended_diff_only": 0.19072502106428146, |
| "tpp_threshold_100_total_metric": 0.2244500368833542, |
| "tpp_threshold_100_intended_diff_only": 0.4357000529766083, |
| "tpp_threshold_100_unintended_diff_only": 0.21125001609325408, |
| "tpp_threshold_500_total_metric": 0.17055003345012665, |
| "tpp_threshold_500_intended_diff_only": 0.4365000545978546, |
| "tpp_threshold_500_unintended_diff_only": 0.265950021147728 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.10410001873970032, |
| "tpp_threshold_2_intended_diff_only": 0.15960003137588502, |
| "tpp_threshold_2_unintended_diff_only": 0.05550001263618469, |
| "tpp_threshold_5_total_metric": 0.18805002868175508, |
| "tpp_threshold_5_intended_diff_only": 0.3404000401496887, |
| "tpp_threshold_5_unintended_diff_only": 0.15235001146793364, |
| "tpp_threshold_10_total_metric": 0.16770002245903015, |
| "tpp_threshold_10_intended_diff_only": 0.4010000467300415, |
| "tpp_threshold_10_unintended_diff_only": 0.23330002427101135, |
| "tpp_threshold_20_total_metric": 0.1609499990940094, |
| "tpp_threshold_20_intended_diff_only": 0.4384000301361084, |
| "tpp_threshold_20_unintended_diff_only": 0.277450031042099, |
| "tpp_threshold_50_total_metric": 0.11765000522136689, |
| "tpp_threshold_50_intended_diff_only": 0.4618000388145447, |
| "tpp_threshold_50_unintended_diff_only": 0.3441500335931778, |
| "tpp_threshold_100_total_metric": 0.09545003175735474, |
| "tpp_threshold_100_intended_diff_only": 0.46680005788803103, |
| "tpp_threshold_100_unintended_diff_only": 0.37135002613067625, |
| "tpp_threshold_500_total_metric": 0.0595500260591507, |
| "tpp_threshold_500_intended_diff_only": 0.46680005788803103, |
| "tpp_threshold_500_unintended_diff_only": 0.4072500318288803 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.026150000095367432, |
| "tpp_threshold_2_intended_diff_only": 0.03180000782012939, |
| "tpp_threshold_2_unintended_diff_only": 0.0056500077247619625, |
| "tpp_threshold_5_total_metric": 0.07080000638961792, |
| "tpp_threshold_5_intended_diff_only": 0.07700001001358033, |
| "tpp_threshold_5_unintended_diff_only": 0.0062000036239624025, |
| "tpp_threshold_10_total_metric": 0.12324999570846558, |
| "tpp_threshold_10_intended_diff_only": 0.13980001211166382, |
| "tpp_threshold_10_unintended_diff_only": 0.016550016403198243, |
| "tpp_threshold_20_total_metric": 0.21435000896453857, |
| "tpp_threshold_20_intended_diff_only": 0.2364000201225281, |
| "tpp_threshold_20_unintended_diff_only": 0.0220500111579895, |
| "tpp_threshold_50_total_metric": 0.33150002360343933, |
| "tpp_threshold_50_intended_diff_only": 0.36880003213882445, |
| "tpp_threshold_50_unintended_diff_only": 0.037300008535385135, |
| "tpp_threshold_100_total_metric": 0.35345004200935365, |
| "tpp_threshold_100_intended_diff_only": 0.40460004806518557, |
| "tpp_threshold_100_unintended_diff_only": 0.051150006055831906, |
| "tpp_threshold_500_total_metric": 0.2815500408411026, |
| "tpp_threshold_500_intended_diff_only": 0.4062000513076782, |
| "tpp_threshold_500_unintended_diff_only": 0.12465001046657562 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.6.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-snap-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.6.hook_resid_post", |
| "hook_layer": 6, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.19700004160404205, |
| "tpp_threshold_2_intended_diff_only": 0.3060000538825989, |
| "tpp_threshold_2_unintended_diff_only": 0.10900001227855682, |
| "tpp_threshold_5_total_metric": 0.16300004720687866, |
| "tpp_threshold_5_intended_diff_only": 0.3710000514984131, |
| "tpp_threshold_5_unintended_diff_only": 0.20800000429153442, |
| "tpp_threshold_10_total_metric": 0.10675002634525299, |
| "tpp_threshold_10_intended_diff_only": 0.4030000567436218, |
| "tpp_threshold_10_unintended_diff_only": 0.29625003039836884, |
| "tpp_threshold_20_total_metric": 0.08749999105930328, |
| "tpp_threshold_20_intended_diff_only": 0.4240000247955322, |
| "tpp_threshold_20_unintended_diff_only": 0.33650003373622894, |
| "tpp_threshold_50_total_metric": 0.06925000250339508, |
| "tpp_threshold_50_intended_diff_only": 0.437000036239624, |
| "tpp_threshold_50_unintended_diff_only": 0.36775003373622894, |
| "tpp_threshold_100_total_metric": 0.061000049114227295, |
| "tpp_threshold_100_intended_diff_only": 0.4440000653266907, |
| "tpp_threshold_100_unintended_diff_only": 0.3830000162124634, |
| "tpp_threshold_500_total_metric": 0.03275004029273987, |
| "tpp_threshold_500_intended_diff_only": 0.4440000653266907, |
| "tpp_threshold_500_unintended_diff_only": 0.4112500250339508 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.014500036835670471, |
| "tpp_threshold_2_intended_diff_only": 0.025000035762786865, |
| "tpp_threshold_2_unintended_diff_only": 0.010499998927116394, |
| "tpp_threshold_5_total_metric": 0.2107500582933426, |
| "tpp_threshold_5_intended_diff_only": 0.3280000686645508, |
| "tpp_threshold_5_unintended_diff_only": 0.11725001037120819, |
| "tpp_threshold_10_total_metric": 0.16450002789497375, |
| "tpp_threshold_10_intended_diff_only": 0.3750000596046448, |
| "tpp_threshold_10_unintended_diff_only": 0.21050003170967102, |
| "tpp_threshold_20_total_metric": 0.1405000388622284, |
| "tpp_threshold_20_intended_diff_only": 0.41200006008148193, |
| "tpp_threshold_20_unintended_diff_only": 0.27150002121925354, |
| "tpp_threshold_50_total_metric": 0.11950002610683441, |
| "tpp_threshold_50_intended_diff_only": 0.4580000638961792, |
| "tpp_threshold_50_unintended_diff_only": 0.3385000377893448, |
| "tpp_threshold_100_total_metric": 0.08275003731250763, |
| "tpp_threshold_100_intended_diff_only": 0.4620000720024109, |
| "tpp_threshold_100_unintended_diff_only": 0.37925003468990326, |
| "tpp_threshold_500_total_metric": 0.04475004971027374, |
| "tpp_threshold_500_intended_diff_only": 0.4620000720024109, |
| "tpp_threshold_500_unintended_diff_only": 0.41725002229213715 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.1340000182390213, |
| "tpp_threshold_2_intended_diff_only": 0.2160000205039978, |
| "tpp_threshold_2_unintended_diff_only": 0.0820000022649765, |
| "tpp_threshold_5_total_metric": 0.13750004768371582, |
| "tpp_threshold_5_intended_diff_only": 0.35200005769729614, |
| "tpp_threshold_5_unintended_diff_only": 0.21450001001358032, |
| "tpp_threshold_10_total_metric": 0.1285000443458557, |
| "tpp_threshold_10_intended_diff_only": 0.36600005626678467, |
| "tpp_threshold_10_unintended_diff_only": 0.23750001192092896, |
| "tpp_threshold_20_total_metric": 0.1470000296831131, |
| "tpp_threshold_20_intended_diff_only": 0.4030000567436218, |
| "tpp_threshold_20_unintended_diff_only": 0.25600002706050873, |
| "tpp_threshold_50_total_metric": 0.12674999237060547, |
| "tpp_threshold_50_intended_diff_only": 0.4480000138282776, |
| "tpp_threshold_50_unintended_diff_only": 0.3212500214576721, |
| "tpp_threshold_100_total_metric": 0.09800004959106445, |
| "tpp_threshold_100_intended_diff_only": 0.4580000638961792, |
| "tpp_threshold_100_unintended_diff_only": 0.36000001430511475, |
| "tpp_threshold_500_total_metric": 0.04525001347064972, |
| "tpp_threshold_500_intended_diff_only": 0.4580000638961792, |
| "tpp_threshold_500_unintended_diff_only": 0.4127500504255295 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.05625002086162567, |
| "tpp_threshold_2_intended_diff_only": 0.08900004625320435, |
| "tpp_threshold_2_unintended_diff_only": 0.032750025391578674, |
| "tpp_threshold_5_total_metric": 0.2447500228881836, |
| "tpp_threshold_5_intended_diff_only": 0.3960000276565552, |
| "tpp_threshold_5_unintended_diff_only": 0.15125000476837158, |
| "tpp_threshold_10_total_metric": 0.22725003957748413, |
| "tpp_threshold_10_intended_diff_only": 0.46400004625320435, |
| "tpp_threshold_10_unintended_diff_only": 0.23675000667572021, |
| "tpp_threshold_20_total_metric": 0.18124999105930328, |
| "tpp_threshold_20_intended_diff_only": 0.48000001907348633, |
| "tpp_threshold_20_unintended_diff_only": 0.29875002801418304, |
| "tpp_threshold_50_total_metric": 0.1325000375509262, |
| "tpp_threshold_50_intended_diff_only": 0.48600006103515625, |
| "tpp_threshold_50_unintended_diff_only": 0.35350002348423004, |
| "tpp_threshold_100_total_metric": 0.12625004351139069, |
| "tpp_threshold_100_intended_diff_only": 0.49000006914138794, |
| "tpp_threshold_100_unintended_diff_only": 0.36375002562999725, |
| "tpp_threshold_500_total_metric": 0.09400004148483276, |
| "tpp_threshold_500_intended_diff_only": 0.49000006914138794, |
| "tpp_threshold_500_unintended_diff_only": 0.3960000276565552 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.11874997615814209, |
| "tpp_threshold_2_intended_diff_only": 0.16200000047683716, |
| "tpp_threshold_2_unintended_diff_only": 0.04325002431869507, |
| "tpp_threshold_5_total_metric": 0.18424996733665466, |
| "tpp_threshold_5_intended_diff_only": 0.2549999952316284, |
| "tpp_threshold_5_unintended_diff_only": 0.07075002789497375, |
| "tpp_threshold_10_total_metric": 0.21149997413158417, |
| "tpp_threshold_10_intended_diff_only": 0.3970000147819519, |
| "tpp_threshold_10_unintended_diff_only": 0.18550004065036774, |
| "tpp_threshold_20_total_metric": 0.24849994480609894, |
| "tpp_threshold_20_intended_diff_only": 0.4729999899864197, |
| "tpp_threshold_20_unintended_diff_only": 0.22450004518032074, |
| "tpp_threshold_50_total_metric": 0.14024996757507324, |
| "tpp_threshold_50_intended_diff_only": 0.48000001907348633, |
| "tpp_threshold_50_unintended_diff_only": 0.3397500514984131, |
| "tpp_threshold_100_total_metric": 0.10924997925758362, |
| "tpp_threshold_100_intended_diff_only": 0.48000001907348633, |
| "tpp_threshold_100_unintended_diff_only": 0.3707500398159027, |
| "tpp_threshold_500_total_metric": 0.08099998533725739, |
| "tpp_threshold_500_intended_diff_only": 0.48000001907348633, |
| "tpp_threshold_500_unintended_diff_only": 0.39900003373622894 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.010499998927116394, |
| "tpp_threshold_2_intended_diff_only": 0.013999998569488525, |
| "tpp_threshold_2_unintended_diff_only": 0.0034999996423721313, |
| "tpp_threshold_5_total_metric": 0.019000038504600525, |
| "tpp_threshold_5_intended_diff_only": 0.021000027656555176, |
| "tpp_threshold_5_unintended_diff_only": 0.001999989151954651, |
| "tpp_threshold_10_total_metric": 0.09200003743171692, |
| "tpp_threshold_10_intended_diff_only": 0.1260000467300415, |
| "tpp_threshold_10_unintended_diff_only": 0.034000009298324585, |
| "tpp_threshold_20_total_metric": 0.16950002312660217, |
| "tpp_threshold_20_intended_diff_only": 0.2070000171661377, |
| "tpp_threshold_20_unintended_diff_only": 0.03749999403953552, |
| "tpp_threshold_50_total_metric": 0.3152500241994858, |
| "tpp_threshold_50_intended_diff_only": 0.3670000433921814, |
| "tpp_threshold_50_unintended_diff_only": 0.05175001919269562, |
| "tpp_threshold_100_total_metric": 0.36350004374980927, |
| "tpp_threshold_100_intended_diff_only": 0.4360000491142273, |
| "tpp_threshold_100_unintended_diff_only": 0.07250000536441803, |
| "tpp_threshold_500_total_metric": 0.2782500684261322, |
| "tpp_threshold_500_intended_diff_only": 0.43900007009506226, |
| "tpp_threshold_500_unintended_diff_only": 0.16075000166893005 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.07524998486042023, |
| "tpp_threshold_2_intended_diff_only": 0.08300000429153442, |
| "tpp_threshold_2_unintended_diff_only": 0.007750019431114197, |
| "tpp_threshold_5_total_metric": 0.12399996817111969, |
| "tpp_threshold_5_intended_diff_only": 0.13499999046325684, |
| "tpp_threshold_5_unintended_diff_only": 0.011000022292137146, |
| "tpp_threshold_10_total_metric": 0.14949998259544373, |
| "tpp_threshold_10_intended_diff_only": 0.16699999570846558, |
| "tpp_threshold_10_unintended_diff_only": 0.01750001311302185, |
| "tpp_threshold_20_total_metric": 0.27650003135204315, |
| "tpp_threshold_20_intended_diff_only": 0.3020000457763672, |
| "tpp_threshold_20_unintended_diff_only": 0.025500014424324036, |
| "tpp_threshold_50_total_metric": 0.3695000112056732, |
| "tpp_threshold_50_intended_diff_only": 0.42000001668930054, |
| "tpp_threshold_50_unintended_diff_only": 0.05050000548362732, |
| "tpp_threshold_100_total_metric": 0.356500044465065, |
| "tpp_threshold_100_intended_diff_only": 0.4270000457763672, |
| "tpp_threshold_100_unintended_diff_only": 0.07050000131130219, |
| "tpp_threshold_500_total_metric": 0.29350002110004425, |
| "tpp_threshold_500_intended_diff_only": 0.4270000457763672, |
| "tpp_threshold_500_unintended_diff_only": 0.13350002467632294 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": 0.012749969959259033, |
| "tpp_threshold_2_intended_diff_only": 0.010999977588653564, |
| "tpp_threshold_2_unintended_diff_only": -0.0017499923706054688, |
| "tpp_threshold_5_total_metric": 0.051750004291534424, |
| "tpp_threshold_5_intended_diff_only": 0.06000000238418579, |
| "tpp_threshold_5_unintended_diff_only": 0.008249998092651367, |
| "tpp_threshold_10_total_metric": 0.08049997687339783, |
| "tpp_threshold_10_intended_diff_only": 0.08799999952316284, |
| "tpp_threshold_10_unintended_diff_only": 0.007500022649765015, |
| "tpp_threshold_20_total_metric": 0.12924998998641968, |
| "tpp_threshold_20_intended_diff_only": 0.14300000667572021, |
| "tpp_threshold_20_unintended_diff_only": 0.013750016689300537, |
| "tpp_threshold_50_total_metric": 0.2865000218153, |
| "tpp_threshold_50_intended_diff_only": 0.30800002813339233, |
| "tpp_threshold_50_unintended_diff_only": 0.021500006318092346, |
| "tpp_threshold_100_total_metric": 0.3582500219345093, |
| "tpp_threshold_100_intended_diff_only": 0.40000003576278687, |
| "tpp_threshold_100_unintended_diff_only": 0.04175001382827759, |
| "tpp_threshold_500_total_metric": 0.27025000751018524, |
| "tpp_threshold_500_intended_diff_only": 0.4050000309944153, |
| "tpp_threshold_500_unintended_diff_only": 0.13475002348423004 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.000250011682510376, |
| "tpp_threshold_2_intended_diff_only": 0.012000024318695068, |
| "tpp_threshold_2_unintended_diff_only": 0.011750012636184692, |
| "tpp_threshold_5_total_metric": 0.053749993443489075, |
| "tpp_threshold_5_intended_diff_only": 0.06099998950958252, |
| "tpp_threshold_5_unintended_diff_only": 0.007249996066093445, |
| "tpp_threshold_10_total_metric": 0.16099998354911804, |
| "tpp_threshold_10_intended_diff_only": 0.17100000381469727, |
| "tpp_threshold_10_unintended_diff_only": 0.010000020265579224, |
| "tpp_threshold_20_total_metric": 0.25425001978874207, |
| "tpp_threshold_20_intended_diff_only": 0.2710000276565552, |
| "tpp_threshold_20_unintended_diff_only": 0.01675000786781311, |
| "tpp_threshold_50_total_metric": 0.37575002014636993, |
| "tpp_threshold_50_intended_diff_only": 0.406000018119812, |
| "tpp_threshold_50_unintended_diff_only": 0.030249997973442078, |
| "tpp_threshold_100_total_metric": 0.37525004148483276, |
| "tpp_threshold_100_intended_diff_only": 0.40800005197525024, |
| "tpp_threshold_100_unintended_diff_only": 0.03275001049041748, |
| "tpp_threshold_500_total_metric": 0.3082500547170639, |
| "tpp_threshold_500_intended_diff_only": 0.40800005197525024, |
| "tpp_threshold_500_unintended_diff_only": 0.09974999725818634 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.03200003504753113, |
| "tpp_threshold_2_intended_diff_only": 0.03900003433227539, |
| "tpp_threshold_2_unintended_diff_only": 0.006999999284744263, |
| "tpp_threshold_5_total_metric": 0.10550002753734589, |
| "tpp_threshold_5_intended_diff_only": 0.10800004005432129, |
| "tpp_threshold_5_unintended_diff_only": 0.002500012516975403, |
| "tpp_threshold_10_total_metric": 0.13324999809265137, |
| "tpp_threshold_10_intended_diff_only": 0.1470000147819519, |
| "tpp_threshold_10_unintended_diff_only": 0.013750016689300537, |
| "tpp_threshold_20_total_metric": 0.2422499805688858, |
| "tpp_threshold_20_intended_diff_only": 0.2590000033378601, |
| "tpp_threshold_20_unintended_diff_only": 0.016750022768974304, |
| "tpp_threshold_50_total_metric": 0.31050004065036774, |
| "tpp_threshold_50_intended_diff_only": 0.34300005435943604, |
| "tpp_threshold_50_unintended_diff_only": 0.0325000137090683, |
| "tpp_threshold_100_total_metric": 0.3137500584125519, |
| "tpp_threshold_100_intended_diff_only": 0.35200005769729614, |
| "tpp_threshold_100_unintended_diff_only": 0.03824999928474426, |
| "tpp_threshold_500_total_metric": 0.2575000524520874, |
| "tpp_threshold_500_intended_diff_only": 0.35200005769729614, |
| "tpp_threshold_500_unintended_diff_only": 0.09450000524520874 |
| } |
| } |
| } |
| } |