| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "65b37170-42a3-4f63-bfdc-b4728a912b3a", |
| "datetime_epoch_millis": 1745618741934, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.07237500548362731, |
| "tpp_threshold_2_intended_diff_only": 0.11480001211166382, |
| "tpp_threshold_2_unintended_diff_only": 0.0424250066280365, |
| "tpp_threshold_5_total_metric": 0.09940000474452972, |
| "tpp_threshold_5_intended_diff_only": 0.18480001091957093, |
| "tpp_threshold_5_unintended_diff_only": 0.0854000061750412, |
| "tpp_threshold_10_total_metric": 0.1002500057220459, |
| "tpp_threshold_10_intended_diff_only": 0.22740001678466798, |
| "tpp_threshold_10_unintended_diff_only": 0.12715001106262208, |
| "tpp_threshold_20_total_metric": 0.11480001509189605, |
| "tpp_threshold_20_intended_diff_only": 0.2687000215053558, |
| "tpp_threshold_20_unintended_diff_only": 0.15390000641345977, |
| "tpp_threshold_50_total_metric": 0.16755001842975617, |
| "tpp_threshold_50_intended_diff_only": 0.3556000292301178, |
| "tpp_threshold_50_unintended_diff_only": 0.18805001080036166, |
| "tpp_threshold_100_total_metric": 0.1982250139117241, |
| "tpp_threshold_100_intended_diff_only": 0.404800021648407, |
| "tpp_threshold_100_unintended_diff_only": 0.2065750077366829, |
| "tpp_threshold_500_total_metric": 0.19385003447532656, |
| "tpp_threshold_500_intended_diff_only": 0.44220004677772523, |
| "tpp_threshold_500_unintended_diff_only": 0.2483500123023987 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.13365001380443572, |
| "tpp_threshold_2_intended_diff_only": 0.21380001306533813, |
| "tpp_threshold_2_unintended_diff_only": 0.08014999926090241, |
| "tpp_threshold_5_total_metric": 0.17090000510215758, |
| "tpp_threshold_5_intended_diff_only": 0.3318000078201294, |
| "tpp_threshold_5_unintended_diff_only": 0.1609000027179718, |
| "tpp_threshold_10_total_metric": 0.15275000631809235, |
| "tpp_threshold_10_intended_diff_only": 0.3930000185966492, |
| "tpp_threshold_10_unintended_diff_only": 0.24025001227855683, |
| "tpp_threshold_20_total_metric": 0.13430002331733704, |
| "tpp_threshold_20_intended_diff_only": 0.42360002994537355, |
| "tpp_threshold_20_unintended_diff_only": 0.2893000066280365, |
| "tpp_threshold_50_total_metric": 0.10615000426769257, |
| "tpp_threshold_50_intended_diff_only": 0.4508000135421753, |
| "tpp_threshold_50_unintended_diff_only": 0.34465000927448275, |
| "tpp_threshold_100_total_metric": 0.09410001337528229, |
| "tpp_threshold_100_intended_diff_only": 0.4622000217437744, |
| "tpp_threshold_100_unintended_diff_only": 0.3681000083684921, |
| "tpp_threshold_500_total_metric": 0.0716000258922577, |
| "tpp_threshold_500_intended_diff_only": 0.46720004081726074, |
| "tpp_threshold_500_unintended_diff_only": 0.3956000149250031 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.011099997162818908, |
| "tpp_threshold_2_intended_diff_only": 0.015800011157989503, |
| "tpp_threshold_2_unintended_diff_only": 0.004700013995170593, |
| "tpp_threshold_5_total_metric": 0.027900004386901857, |
| "tpp_threshold_5_intended_diff_only": 0.03780001401901245, |
| "tpp_threshold_5_unintended_diff_only": 0.009900009632110596, |
| "tpp_threshold_10_total_metric": 0.04775000512599945, |
| "tpp_threshold_10_intended_diff_only": 0.061800014972686765, |
| "tpp_threshold_10_unintended_diff_only": 0.014050009846687316, |
| "tpp_threshold_20_total_metric": 0.09530000686645508, |
| "tpp_threshold_20_intended_diff_only": 0.11380001306533813, |
| "tpp_threshold_20_unintended_diff_only": 0.018500006198883055, |
| "tpp_threshold_50_total_metric": 0.22895003259181976, |
| "tpp_threshold_50_intended_diff_only": 0.2604000449180603, |
| "tpp_threshold_50_unintended_diff_only": 0.03145001232624054, |
| "tpp_threshold_100_total_metric": 0.3023500144481659, |
| "tpp_threshold_100_intended_diff_only": 0.34740002155303956, |
| "tpp_threshold_100_unintended_diff_only": 0.045050007104873654, |
| "tpp_threshold_500_total_metric": 0.3161000430583954, |
| "tpp_threshold_500_intended_diff_only": 0.4172000527381897, |
| "tpp_threshold_500_unintended_diff_only": 0.10110000967979431 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.13.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-snap-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.13.hook_resid_post", |
| "hook_layer": 13, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.0794999897480011, |
| "tpp_threshold_2_intended_diff_only": 0.09799998998641968, |
| "tpp_threshold_2_unintended_diff_only": 0.01850000023841858, |
| "tpp_threshold_5_total_metric": 0.1655000001192093, |
| "tpp_threshold_5_intended_diff_only": 0.24000000953674316, |
| "tpp_threshold_5_unintended_diff_only": 0.07450000941753387, |
| "tpp_threshold_10_total_metric": 0.1770000010728836, |
| "tpp_threshold_10_intended_diff_only": 0.36000001430511475, |
| "tpp_threshold_10_unintended_diff_only": 0.18300001323223114, |
| "tpp_threshold_20_total_metric": 0.13524998724460602, |
| "tpp_threshold_20_intended_diff_only": 0.39800000190734863, |
| "tpp_threshold_20_unintended_diff_only": 0.2627500146627426, |
| "tpp_threshold_50_total_metric": 0.09124995768070221, |
| "tpp_threshold_50_intended_diff_only": 0.4309999942779541, |
| "tpp_threshold_50_unintended_diff_only": 0.3397500365972519, |
| "tpp_threshold_100_total_metric": 0.09099999070167542, |
| "tpp_threshold_100_intended_diff_only": 0.4440000057220459, |
| "tpp_threshold_100_unintended_diff_only": 0.3530000150203705, |
| "tpp_threshold_500_total_metric": 0.058999985456466675, |
| "tpp_threshold_500_intended_diff_only": 0.4520000219345093, |
| "tpp_threshold_500_unintended_diff_only": 0.3930000364780426 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.12025003135204315, |
| "tpp_threshold_2_intended_diff_only": 0.25200003385543823, |
| "tpp_threshold_2_unintended_diff_only": 0.13175000250339508, |
| "tpp_threshold_5_total_metric": 0.14000000059604645, |
| "tpp_threshold_5_intended_diff_only": 0.3050000071525574, |
| "tpp_threshold_5_unintended_diff_only": 0.16500000655651093, |
| "tpp_threshold_10_total_metric": 0.11750002205371857, |
| "tpp_threshold_10_intended_diff_only": 0.359000027179718, |
| "tpp_threshold_10_unintended_diff_only": 0.24150000512599945, |
| "tpp_threshold_20_total_metric": 0.11725001037120819, |
| "tpp_threshold_20_intended_diff_only": 0.3930000066757202, |
| "tpp_threshold_20_unintended_diff_only": 0.275749996304512, |
| "tpp_threshold_50_total_metric": 0.09225001931190491, |
| "tpp_threshold_50_intended_diff_only": 0.43800002336502075, |
| "tpp_threshold_50_unintended_diff_only": 0.34575000405311584, |
| "tpp_threshold_100_total_metric": 0.07199998199939728, |
| "tpp_threshold_100_intended_diff_only": 0.45399999618530273, |
| "tpp_threshold_100_unintended_diff_only": 0.38200001418590546, |
| "tpp_threshold_500_total_metric": 0.05525001883506775, |
| "tpp_threshold_500_intended_diff_only": 0.46000003814697266, |
| "tpp_threshold_500_unintended_diff_only": 0.4047500193119049 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.13600003719329834, |
| "tpp_threshold_2_intended_diff_only": 0.2890000343322754, |
| "tpp_threshold_2_unintended_diff_only": 0.15299999713897705, |
| "tpp_threshold_5_total_metric": 0.11275003850460052, |
| "tpp_threshold_5_intended_diff_only": 0.3490000367164612, |
| "tpp_threshold_5_unintended_diff_only": 0.23624999821186066, |
| "tpp_threshold_10_total_metric": 0.09225000441074371, |
| "tpp_threshold_10_intended_diff_only": 0.3790000081062317, |
| "tpp_threshold_10_unintended_diff_only": 0.286750003695488, |
| "tpp_threshold_20_total_metric": 0.09275005757808685, |
| "tpp_threshold_20_intended_diff_only": 0.4030000567436218, |
| "tpp_threshold_20_unintended_diff_only": 0.310249999165535, |
| "tpp_threshold_50_total_metric": 0.06850001215934753, |
| "tpp_threshold_50_intended_diff_only": 0.42500001192092896, |
| "tpp_threshold_50_unintended_diff_only": 0.3564999997615814, |
| "tpp_threshold_100_total_metric": 0.07225005328655243, |
| "tpp_threshold_100_intended_diff_only": 0.4490000605583191, |
| "tpp_threshold_100_unintended_diff_only": 0.37675000727176666, |
| "tpp_threshold_500_total_metric": 0.04750005900859833, |
| "tpp_threshold_500_intended_diff_only": 0.4540000557899475, |
| "tpp_threshold_500_unintended_diff_only": 0.4064999967813492 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.23274999856948853, |
| "tpp_threshold_2_intended_diff_only": 0.3100000023841858, |
| "tpp_threshold_2_unintended_diff_only": 0.07725000381469727, |
| "tpp_threshold_5_total_metric": 0.25699998438358307, |
| "tpp_threshold_5_intended_diff_only": 0.4179999828338623, |
| "tpp_threshold_5_unintended_diff_only": 0.16099999845027924, |
| "tpp_threshold_10_total_metric": 0.25849997997283936, |
| "tpp_threshold_10_intended_diff_only": 0.4580000042915344, |
| "tpp_threshold_10_unintended_diff_only": 0.19950002431869507, |
| "tpp_threshold_20_total_metric": 0.21900002658367157, |
| "tpp_threshold_20_intended_diff_only": 0.4790000319480896, |
| "tpp_threshold_20_unintended_diff_only": 0.26000000536441803, |
| "tpp_threshold_50_total_metric": 0.15975002944469452, |
| "tpp_threshold_50_intended_diff_only": 0.484000027179718, |
| "tpp_threshold_50_unintended_diff_only": 0.3242499977350235, |
| "tpp_threshold_100_total_metric": 0.13474997878074646, |
| "tpp_threshold_100_intended_diff_only": 0.4869999885559082, |
| "tpp_threshold_100_unintended_diff_only": 0.35225000977516174, |
| "tpp_threshold_500_total_metric": 0.11300000548362732, |
| "tpp_threshold_500_intended_diff_only": 0.4930000305175781, |
| "tpp_threshold_500_unintended_diff_only": 0.3800000250339508 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.09975001215934753, |
| "tpp_threshold_2_intended_diff_only": 0.12000000476837158, |
| "tpp_threshold_2_unintended_diff_only": 0.020249992609024048, |
| "tpp_threshold_5_total_metric": 0.17925000190734863, |
| "tpp_threshold_5_intended_diff_only": 0.34700000286102295, |
| "tpp_threshold_5_unintended_diff_only": 0.16775000095367432, |
| "tpp_threshold_10_total_metric": 0.11850002408027649, |
| "tpp_threshold_10_intended_diff_only": 0.409000039100647, |
| "tpp_threshold_10_unintended_diff_only": 0.2905000150203705, |
| "tpp_threshold_20_total_metric": 0.10725003480911255, |
| "tpp_threshold_20_intended_diff_only": 0.4450000524520874, |
| "tpp_threshold_20_unintended_diff_only": 0.33775001764297485, |
| "tpp_threshold_50_total_metric": 0.11900000274181366, |
| "tpp_threshold_50_intended_diff_only": 0.47600001096725464, |
| "tpp_threshold_50_unintended_diff_only": 0.357000008225441, |
| "tpp_threshold_100_total_metric": 0.10050006210803986, |
| "tpp_threshold_100_intended_diff_only": 0.47700005769729614, |
| "tpp_threshold_100_unintended_diff_only": 0.3764999955892563, |
| "tpp_threshold_500_total_metric": 0.08325006067752838, |
| "tpp_threshold_500_intended_diff_only": 0.47700005769729614, |
| "tpp_threshold_500_unintended_diff_only": 0.39374999701976776 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.011999979615211487, |
| "tpp_threshold_2_intended_diff_only": 0.018999993801116943, |
| "tpp_threshold_2_unintended_diff_only": 0.0070000141859054565, |
| "tpp_threshold_5_total_metric": 0.012000009417533875, |
| "tpp_threshold_5_intended_diff_only": 0.021000027656555176, |
| "tpp_threshold_5_unintended_diff_only": 0.009000018239021301, |
| "tpp_threshold_10_total_metric": 0.024250030517578125, |
| "tpp_threshold_10_intended_diff_only": 0.03900003433227539, |
| "tpp_threshold_10_unintended_diff_only": 0.014750003814697266, |
| "tpp_threshold_20_total_metric": 0.04225000739097595, |
| "tpp_threshold_20_intended_diff_only": 0.05900001525878906, |
| "tpp_threshold_20_unintended_diff_only": 0.01675000786781311, |
| "tpp_threshold_50_total_metric": 0.1887500286102295, |
| "tpp_threshold_50_intended_diff_only": 0.22800004482269287, |
| "tpp_threshold_50_unintended_diff_only": 0.03925001621246338, |
| "tpp_threshold_100_total_metric": 0.30625002086162567, |
| "tpp_threshold_100_intended_diff_only": 0.3490000367164612, |
| "tpp_threshold_100_unintended_diff_only": 0.04275001585483551, |
| "tpp_threshold_500_total_metric": 0.38950005173683167, |
| "tpp_threshold_500_intended_diff_only": 0.4540000557899475, |
| "tpp_threshold_500_unintended_diff_only": 0.06450000405311584 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.013250008225440979, |
| "tpp_threshold_2_intended_diff_only": 0.021000027656555176, |
| "tpp_threshold_2_unintended_diff_only": 0.007750019431114197, |
| "tpp_threshold_5_total_metric": 0.04424998164176941, |
| "tpp_threshold_5_intended_diff_only": 0.0559999942779541, |
| "tpp_threshold_5_unintended_diff_only": 0.011750012636184692, |
| "tpp_threshold_10_total_metric": 0.06674997508525848, |
| "tpp_threshold_10_intended_diff_only": 0.07899999618530273, |
| "tpp_threshold_10_unintended_diff_only": 0.01225002110004425, |
| "tpp_threshold_20_total_metric": 0.10125002264976501, |
| "tpp_threshold_20_intended_diff_only": 0.12700003385543823, |
| "tpp_threshold_20_unintended_diff_only": 0.025750011205673218, |
| "tpp_threshold_50_total_metric": 0.2680000066757202, |
| "tpp_threshold_50_intended_diff_only": 0.3070000410079956, |
| "tpp_threshold_50_unintended_diff_only": 0.03900003433227539, |
| "tpp_threshold_100_total_metric": 0.3239999860525131, |
| "tpp_threshold_100_intended_diff_only": 0.40299999713897705, |
| "tpp_threshold_100_unintended_diff_only": 0.07900001108646393, |
| "tpp_threshold_500_total_metric": 0.3100000321865082, |
| "tpp_threshold_500_intended_diff_only": 0.4270000457763672, |
| "tpp_threshold_500_unintended_diff_only": 0.11700001358985901 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": 0.005250006914138794, |
| "tpp_threshold_2_intended_diff_only": 0.0040000081062316895, |
| "tpp_threshold_2_unintended_diff_only": -0.0012499988079071045, |
| "tpp_threshold_5_total_metric": 0.003750041127204895, |
| "tpp_threshold_5_intended_diff_only": 0.01100003719329834, |
| "tpp_threshold_5_unintended_diff_only": 0.007249996066093445, |
| "tpp_threshold_10_total_metric": 0.00975005328655243, |
| "tpp_threshold_10_intended_diff_only": 0.01900005340576172, |
| "tpp_threshold_10_unintended_diff_only": 0.00925000011920929, |
| "tpp_threshold_20_total_metric": 0.05400000512599945, |
| "tpp_threshold_20_intended_diff_only": 0.0690000057220459, |
| "tpp_threshold_20_unintended_diff_only": 0.015000000596046448, |
| "tpp_threshold_50_total_metric": 0.1727500557899475, |
| "tpp_threshold_50_intended_diff_only": 0.18500006198883057, |
| "tpp_threshold_50_unintended_diff_only": 0.012250006198883057, |
| "tpp_threshold_100_total_metric": 0.2497500479221344, |
| "tpp_threshold_100_intended_diff_only": 0.27500003576278687, |
| "tpp_threshold_100_unintended_diff_only": 0.025249987840652466, |
| "tpp_threshold_500_total_metric": 0.24800007045269012, |
| "tpp_threshold_500_intended_diff_only": 0.42500007152557373, |
| "tpp_threshold_500_unintended_diff_only": 0.1770000010728836 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.007500022649765015, |
| "tpp_threshold_2_intended_diff_only": 0.01500004529953003, |
| "tpp_threshold_2_unintended_diff_only": 0.007500022649765015, |
| "tpp_threshold_5_total_metric": 0.02250000834465027, |
| "tpp_threshold_5_intended_diff_only": 0.03200000524520874, |
| "tpp_threshold_5_unintended_diff_only": 0.009499996900558472, |
| "tpp_threshold_10_total_metric": 0.03850001096725464, |
| "tpp_threshold_10_intended_diff_only": 0.04900002479553223, |
| "tpp_threshold_10_unintended_diff_only": 0.010500013828277588, |
| "tpp_threshold_20_total_metric": 0.07600003480911255, |
| "tpp_threshold_20_intended_diff_only": 0.09400004148483276, |
| "tpp_threshold_20_unintended_diff_only": 0.018000006675720215, |
| "tpp_threshold_50_total_metric": 0.205500066280365, |
| "tpp_threshold_50_intended_diff_only": 0.23100006580352783, |
| "tpp_threshold_50_unintended_diff_only": 0.025499999523162842, |
| "tpp_threshold_100_total_metric": 0.3112500458955765, |
| "tpp_threshold_100_intended_diff_only": 0.34300005435943604, |
| "tpp_threshold_100_unintended_diff_only": 0.03175000846385956, |
| "tpp_threshold_500_total_metric": 0.3512500673532486, |
| "tpp_threshold_500_intended_diff_only": 0.4110000729560852, |
| "tpp_threshold_500_unintended_diff_only": 0.05975000560283661 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.01749996840953827, |
| "tpp_threshold_2_intended_diff_only": 0.019999980926513672, |
| "tpp_threshold_2_unintended_diff_only": 0.002500012516975403, |
| "tpp_threshold_5_total_metric": 0.05699998140335083, |
| "tpp_threshold_5_intended_diff_only": 0.0690000057220459, |
| "tpp_threshold_5_unintended_diff_only": 0.012000024318695068, |
| "tpp_threshold_10_total_metric": 0.09949995577335358, |
| "tpp_threshold_10_intended_diff_only": 0.12299996614456177, |
| "tpp_threshold_10_unintended_diff_only": 0.02350001037120819, |
| "tpp_threshold_20_total_metric": 0.20299996435642242, |
| "tpp_threshold_20_intended_diff_only": 0.21999996900558472, |
| "tpp_threshold_20_unintended_diff_only": 0.017000004649162292, |
| "tpp_threshold_50_total_metric": 0.3097500056028366, |
| "tpp_threshold_50_intended_diff_only": 0.35100001096725464, |
| "tpp_threshold_50_unintended_diff_only": 0.04125000536441803, |
| "tpp_threshold_100_total_metric": 0.3204999715089798, |
| "tpp_threshold_100_intended_diff_only": 0.3669999837875366, |
| "tpp_threshold_100_unintended_diff_only": 0.046500012278556824, |
| "tpp_threshold_500_total_metric": 0.28174999356269836, |
| "tpp_threshold_500_intended_diff_only": 0.36900001764297485, |
| "tpp_threshold_500_unintended_diff_only": 0.08725002408027649 |
| } |
| } |
| } |
| } |