| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "91ffb92c-ec01-47b1-9f45-773ec12047b3", |
| "datetime_epoch_millis": 1745752372158, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.03582501113414764, |
| "tpp_threshold_2_intended_diff_only": 0.045500004291534425, |
| "tpp_threshold_2_unintended_diff_only": 0.00967499315738678, |
| "tpp_threshold_5_total_metric": 0.08102499991655349, |
| "tpp_threshold_5_intended_diff_only": 0.1046999990940094, |
| "tpp_threshold_5_unintended_diff_only": 0.0236749991774559, |
| "tpp_threshold_10_total_metric": 0.18400001376867295, |
| "tpp_threshold_10_intended_diff_only": 0.22860000729560853, |
| "tpp_threshold_10_unintended_diff_only": 0.04459999352693558, |
| "tpp_threshold_20_total_metric": 0.22440001815557478, |
| "tpp_threshold_20_intended_diff_only": 0.30660001039505, |
| "tpp_threshold_20_unintended_diff_only": 0.08219999223947524, |
| "tpp_threshold_50_total_metric": 0.25502501279115675, |
| "tpp_threshold_50_intended_diff_only": 0.3882000148296356, |
| "tpp_threshold_50_unintended_diff_only": 0.13317500203847885, |
| "tpp_threshold_100_total_metric": 0.2610000088810921, |
| "tpp_threshold_100_intended_diff_only": 0.427700012922287, |
| "tpp_threshold_100_unintended_diff_only": 0.16670000404119492, |
| "tpp_threshold_500_total_metric": 0.21417502760887147, |
| "tpp_threshold_500_intended_diff_only": 0.4473000347614288, |
| "tpp_threshold_500_unintended_diff_only": 0.23312500715255735 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.04525000751018524, |
| "tpp_threshold_2_intended_diff_only": 0.05900000333786011, |
| "tpp_threshold_2_unintended_diff_only": 0.013749995827674865, |
| "tpp_threshold_5_total_metric": 0.11535000205039977, |
| "tpp_threshold_5_intended_diff_only": 0.15140000581741334, |
| "tpp_threshold_5_unintended_diff_only": 0.03605000376701355, |
| "tpp_threshold_10_total_metric": 0.27205002307891846, |
| "tpp_threshold_10_intended_diff_only": 0.3432000160217285, |
| "tpp_threshold_10_unintended_diff_only": 0.07114999294281006, |
| "tpp_threshold_20_total_metric": 0.28160001933574674, |
| "tpp_threshold_20_intended_diff_only": 0.42180001735687256, |
| "tpp_threshold_20_unintended_diff_only": 0.1401999980211258, |
| "tpp_threshold_50_total_metric": 0.22495001256465913, |
| "tpp_threshold_50_intended_diff_only": 0.4558000206947327, |
| "tpp_threshold_50_unintended_diff_only": 0.23085000813007356, |
| "tpp_threshold_100_total_metric": 0.17375000715255737, |
| "tpp_threshold_100_intended_diff_only": 0.46620001792907717, |
| "tpp_threshold_100_unintended_diff_only": 0.2924500107765198, |
| "tpp_threshold_500_total_metric": 0.08675002753734588, |
| "tpp_threshold_500_intended_diff_only": 0.4698000431060791, |
| "tpp_threshold_500_unintended_diff_only": 0.3830500155687332 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.026400014758110046, |
| "tpp_threshold_2_intended_diff_only": 0.03200000524520874, |
| "tpp_threshold_2_unintended_diff_only": 0.005599990487098694, |
| "tpp_threshold_5_total_metric": 0.04669999778270721, |
| "tpp_threshold_5_intended_diff_only": 0.05799999237060547, |
| "tpp_threshold_5_unintended_diff_only": 0.011299994587898255, |
| "tpp_threshold_10_total_metric": 0.09595000445842743, |
| "tpp_threshold_10_intended_diff_only": 0.11399999856948853, |
| "tpp_threshold_10_unintended_diff_only": 0.018049994111061098, |
| "tpp_threshold_20_total_metric": 0.16720001697540282, |
| "tpp_threshold_20_intended_diff_only": 0.19140000343322755, |
| "tpp_threshold_20_unintended_diff_only": 0.02419998645782471, |
| "tpp_threshold_50_total_metric": 0.2851000130176544, |
| "tpp_threshold_50_intended_diff_only": 0.32060000896453855, |
| "tpp_threshold_50_unintended_diff_only": 0.035499995946884154, |
| "tpp_threshold_100_total_metric": 0.3482500106096268, |
| "tpp_threshold_100_intended_diff_only": 0.3892000079154968, |
| "tpp_threshold_100_unintended_diff_only": 0.04094999730587005, |
| "tpp_threshold_500_total_metric": 0.34160002768039704, |
| "tpp_threshold_500_intended_diff_only": 0.42480002641677855, |
| "tpp_threshold_500_unintended_diff_only": 0.08319999873638154 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.7.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.7.hook_resid_post", |
| "hook_layer": 7, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.16124998033046722, |
| "tpp_threshold_2_intended_diff_only": 0.20499998331069946, |
| "tpp_threshold_2_unintended_diff_only": 0.04375000298023224, |
| "tpp_threshold_5_total_metric": 0.1862499713897705, |
| "tpp_threshold_5_intended_diff_only": 0.2749999761581421, |
| "tpp_threshold_5_unintended_diff_only": 0.08875000476837158, |
| "tpp_threshold_10_total_metric": 0.21049998700618744, |
| "tpp_threshold_10_intended_diff_only": 0.3529999852180481, |
| "tpp_threshold_10_unintended_diff_only": 0.14249999821186066, |
| "tpp_threshold_20_total_metric": 0.22074995934963226, |
| "tpp_threshold_20_intended_diff_only": 0.39499998092651367, |
| "tpp_threshold_20_unintended_diff_only": 0.1742500215768814, |
| "tpp_threshold_50_total_metric": 0.16399997472763062, |
| "tpp_threshold_50_intended_diff_only": 0.4350000023841858, |
| "tpp_threshold_50_unintended_diff_only": 0.2710000276565552, |
| "tpp_threshold_100_total_metric": 0.12499997019767761, |
| "tpp_threshold_100_intended_diff_only": 0.44999998807907104, |
| "tpp_threshold_100_unintended_diff_only": 0.32500001788139343, |
| "tpp_threshold_500_total_metric": 0.06700000166893005, |
| "tpp_threshold_500_intended_diff_only": 0.4520000219345093, |
| "tpp_threshold_500_unintended_diff_only": 0.3850000202655792 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.042999982833862305, |
| "tpp_threshold_2_intended_diff_only": 0.06099998950958252, |
| "tpp_threshold_2_unintended_diff_only": 0.018000006675720215, |
| "tpp_threshold_5_total_metric": 0.13574999570846558, |
| "tpp_threshold_5_intended_diff_only": 0.1850000023841858, |
| "tpp_threshold_5_unintended_diff_only": 0.049250006675720215, |
| "tpp_threshold_10_total_metric": 0.1717500239610672, |
| "tpp_threshold_10_intended_diff_only": 0.22100001573562622, |
| "tpp_threshold_10_unintended_diff_only": 0.04924999177455902, |
| "tpp_threshold_20_total_metric": 0.23475003242492676, |
| "tpp_threshold_20_intended_diff_only": 0.36400002241134644, |
| "tpp_threshold_20_unintended_diff_only": 0.12924998998641968, |
| "tpp_threshold_50_total_metric": 0.17875003814697266, |
| "tpp_threshold_50_intended_diff_only": 0.4180000424385071, |
| "tpp_threshold_50_unintended_diff_only": 0.23925000429153442, |
| "tpp_threshold_100_total_metric": 0.1589999943971634, |
| "tpp_threshold_100_intended_diff_only": 0.4490000009536743, |
| "tpp_threshold_100_unintended_diff_only": 0.2900000065565109, |
| "tpp_threshold_500_total_metric": 0.08075003325939178, |
| "tpp_threshold_500_intended_diff_only": 0.46400004625320435, |
| "tpp_threshold_500_unintended_diff_only": 0.38325001299381256 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.01349996030330658, |
| "tpp_threshold_2_intended_diff_only": 0.01699995994567871, |
| "tpp_threshold_2_unintended_diff_only": 0.0034999996423721313, |
| "tpp_threshold_5_total_metric": 0.058999985456466675, |
| "tpp_threshold_5_intended_diff_only": 0.06599998474121094, |
| "tpp_threshold_5_unintended_diff_only": 0.006999999284744263, |
| "tpp_threshold_10_total_metric": 0.3167499750852585, |
| "tpp_threshold_10_intended_diff_only": 0.36399996280670166, |
| "tpp_threshold_10_unintended_diff_only": 0.047249987721443176, |
| "tpp_threshold_20_total_metric": 0.322749987244606, |
| "tpp_threshold_20_intended_diff_only": 0.4269999861717224, |
| "tpp_threshold_20_unintended_diff_only": 0.1042499989271164, |
| "tpp_threshold_50_total_metric": 0.25624996423721313, |
| "tpp_threshold_50_intended_diff_only": 0.4559999704360962, |
| "tpp_threshold_50_unintended_diff_only": 0.19975000619888306, |
| "tpp_threshold_100_total_metric": 0.18050000071525574, |
| "tpp_threshold_100_intended_diff_only": 0.4570000171661377, |
| "tpp_threshold_100_unintended_diff_only": 0.27650001645088196, |
| "tpp_threshold_500_total_metric": 0.07400000095367432, |
| "tpp_threshold_500_intended_diff_only": 0.4570000171661377, |
| "tpp_threshold_500_unintended_diff_only": 0.3830000162124634 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.004750058054924011, |
| "tpp_threshold_2_intended_diff_only": 0.006000041961669922, |
| "tpp_threshold_2_unintended_diff_only": 0.0012499839067459106, |
| "tpp_threshold_5_total_metric": 0.08900001645088196, |
| "tpp_threshold_5_intended_diff_only": 0.10500001907348633, |
| "tpp_threshold_5_unintended_diff_only": 0.01600000262260437, |
| "tpp_threshold_10_total_metric": 0.4017500728368759, |
| "tpp_threshold_10_intended_diff_only": 0.4540000557899475, |
| "tpp_threshold_10_unintended_diff_only": 0.052249982953071594, |
| "tpp_threshold_20_total_metric": 0.35725007951259613, |
| "tpp_threshold_20_intended_diff_only": 0.48100006580352783, |
| "tpp_threshold_20_unintended_diff_only": 0.1237499862909317, |
| "tpp_threshold_50_total_metric": 0.29475001990795135, |
| "tpp_threshold_50_intended_diff_only": 0.48900002241134644, |
| "tpp_threshold_50_unintended_diff_only": 0.19425000250339508, |
| "tpp_threshold_100_total_metric": 0.2370000034570694, |
| "tpp_threshold_100_intended_diff_only": 0.49400001764297485, |
| "tpp_threshold_100_unintended_diff_only": 0.25700001418590546, |
| "tpp_threshold_500_total_metric": 0.12500004470348358, |
| "tpp_threshold_500_intended_diff_only": 0.49500006437301636, |
| "tpp_threshold_500_unintended_diff_only": 0.3700000196695328 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.003750056028366089, |
| "tpp_threshold_2_intended_diff_only": 0.006000041961669922, |
| "tpp_threshold_2_unintended_diff_only": 0.002249985933303833, |
| "tpp_threshold_5_total_metric": 0.10675004124641418, |
| "tpp_threshold_5_intended_diff_only": 0.1260000467300415, |
| "tpp_threshold_5_unintended_diff_only": 0.01925000548362732, |
| "tpp_threshold_10_total_metric": 0.25950005650520325, |
| "tpp_threshold_10_intended_diff_only": 0.3240000605583191, |
| "tpp_threshold_10_unintended_diff_only": 0.06450000405311584, |
| "tpp_threshold_20_total_metric": 0.27250003814697266, |
| "tpp_threshold_20_intended_diff_only": 0.44200003147125244, |
| "tpp_threshold_20_unintended_diff_only": 0.16949999332427979, |
| "tpp_threshold_50_total_metric": 0.23100006580352783, |
| "tpp_threshold_50_intended_diff_only": 0.48100006580352783, |
| "tpp_threshold_50_unintended_diff_only": 0.25, |
| "tpp_threshold_100_total_metric": 0.16725006699562073, |
| "tpp_threshold_100_intended_diff_only": 0.48100006580352783, |
| "tpp_threshold_100_unintended_diff_only": 0.3137499988079071, |
| "tpp_threshold_500_total_metric": 0.0870000571012497, |
| "tpp_threshold_500_intended_diff_only": 0.48100006580352783, |
| "tpp_threshold_500_unintended_diff_only": 0.39400000870227814 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.00625002384185791, |
| "tpp_threshold_2_intended_diff_only": 0.009000003337860107, |
| "tpp_threshold_2_unintended_diff_only": 0.0027499794960021973, |
| "tpp_threshold_5_total_metric": 0.004500031471252441, |
| "tpp_threshold_5_intended_diff_only": 0.008000016212463379, |
| "tpp_threshold_5_unintended_diff_only": 0.0034999847412109375, |
| "tpp_threshold_10_total_metric": 0.014250040054321289, |
| "tpp_threshold_10_intended_diff_only": 0.021000027656555176, |
| "tpp_threshold_10_unintended_diff_only": 0.006749987602233887, |
| "tpp_threshold_20_total_metric": 0.06700003147125244, |
| "tpp_threshold_20_intended_diff_only": 0.08300000429153442, |
| "tpp_threshold_20_unintended_diff_only": 0.015999972820281982, |
| "tpp_threshold_50_total_metric": 0.23800000548362732, |
| "tpp_threshold_50_intended_diff_only": 0.2580000162124634, |
| "tpp_threshold_50_unintended_diff_only": 0.02000001072883606, |
| "tpp_threshold_100_total_metric": 0.3327500522136688, |
| "tpp_threshold_100_intended_diff_only": 0.3630000352859497, |
| "tpp_threshold_100_unintended_diff_only": 0.030249983072280884, |
| "tpp_threshold_500_total_metric": 0.3972499966621399, |
| "tpp_threshold_500_intended_diff_only": 0.4490000009536743, |
| "tpp_threshold_500_unintended_diff_only": 0.051750004291534424 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.025749996304512024, |
| "tpp_threshold_2_intended_diff_only": 0.02799999713897705, |
| "tpp_threshold_2_unintended_diff_only": 0.002250000834465027, |
| "tpp_threshold_5_total_metric": 0.0639999657869339, |
| "tpp_threshold_5_intended_diff_only": 0.09999996423721313, |
| "tpp_threshold_5_unintended_diff_only": 0.035999998450279236, |
| "tpp_threshold_10_total_metric": 0.15549999475479126, |
| "tpp_threshold_10_intended_diff_only": 0.19999998807907104, |
| "tpp_threshold_10_unintended_diff_only": 0.044499993324279785, |
| "tpp_threshold_20_total_metric": 0.250249981880188, |
| "tpp_threshold_20_intended_diff_only": 0.31199997663497925, |
| "tpp_threshold_20_unintended_diff_only": 0.06174999475479126, |
| "tpp_threshold_50_total_metric": 0.3554999977350235, |
| "tpp_threshold_50_intended_diff_only": 0.43400001525878906, |
| "tpp_threshold_50_unintended_diff_only": 0.07850001752376556, |
| "tpp_threshold_100_total_metric": 0.35224999487400055, |
| "tpp_threshold_100_intended_diff_only": 0.4399999976158142, |
| "tpp_threshold_100_unintended_diff_only": 0.08775000274181366, |
| "tpp_threshold_500_total_metric": 0.32875002920627594, |
| "tpp_threshold_500_intended_diff_only": 0.44300001859664917, |
| "tpp_threshold_500_unintended_diff_only": 0.11424998939037323 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": -0.0014999806880950928, |
| "tpp_threshold_2_intended_diff_only": 0.0040000081062316895, |
| "tpp_threshold_2_unintended_diff_only": 0.005499988794326782, |
| "tpp_threshold_5_total_metric": -0.0017499923706054688, |
| "tpp_threshold_5_intended_diff_only": 0.0, |
| "tpp_threshold_5_unintended_diff_only": 0.0017499923706054688, |
| "tpp_threshold_10_total_metric": 0.016249969601631165, |
| "tpp_threshold_10_intended_diff_only": 0.019999980926513672, |
| "tpp_threshold_10_unintended_diff_only": 0.0037500113248825073, |
| "tpp_threshold_20_total_metric": 0.03250002861022949, |
| "tpp_threshold_20_intended_diff_only": 0.04500001668930054, |
| "tpp_threshold_20_unintended_diff_only": 0.012499988079071045, |
| "tpp_threshold_50_total_metric": 0.1834999918937683, |
| "tpp_threshold_50_intended_diff_only": 0.20999997854232788, |
| "tpp_threshold_50_unintended_diff_only": 0.02649998664855957, |
| "tpp_threshold_100_total_metric": 0.30574996769428253, |
| "tpp_threshold_100_intended_diff_only": 0.3399999737739563, |
| "tpp_threshold_100_unintended_diff_only": 0.03425000607967377, |
| "tpp_threshold_500_total_metric": 0.26725003123283386, |
| "tpp_threshold_500_intended_diff_only": 0.4190000295639038, |
| "tpp_threshold_500_unintended_diff_only": 0.15174999833106995 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.02350001037120819, |
| "tpp_threshold_2_intended_diff_only": 0.02799999713897705, |
| "tpp_threshold_2_unintended_diff_only": 0.00449998676776886, |
| "tpp_threshold_5_total_metric": 0.04124996066093445, |
| "tpp_threshold_5_intended_diff_only": 0.04799997806549072, |
| "tpp_threshold_5_unintended_diff_only": 0.006750017404556274, |
| "tpp_threshold_10_total_metric": 0.10300001502037048, |
| "tpp_threshold_10_intended_diff_only": 0.11599999666213989, |
| "tpp_threshold_10_unintended_diff_only": 0.01299998164176941, |
| "tpp_threshold_20_total_metric": 0.22349998354911804, |
| "tpp_threshold_20_intended_diff_only": 0.23399996757507324, |
| "tpp_threshold_20_unintended_diff_only": 0.0104999840259552, |
| "tpp_threshold_50_total_metric": 0.3122500032186508, |
| "tpp_threshold_50_intended_diff_only": 0.32999998331069946, |
| "tpp_threshold_50_unintended_diff_only": 0.017749980092048645, |
| "tpp_threshold_100_total_metric": 0.40849997103214264, |
| "tpp_threshold_100_intended_diff_only": 0.4229999780654907, |
| "tpp_threshold_100_unintended_diff_only": 0.014500007033348083, |
| "tpp_threshold_500_total_metric": 0.38700002431869507, |
| "tpp_threshold_500_intended_diff_only": 0.43300002813339233, |
| "tpp_threshold_500_unintended_diff_only": 0.046000003814697266 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.0780000239610672, |
| "tpp_threshold_2_intended_diff_only": 0.0910000205039978, |
| "tpp_threshold_2_unintended_diff_only": 0.012999996542930603, |
| "tpp_threshold_5_total_metric": 0.12550002336502075, |
| "tpp_threshold_5_intended_diff_only": 0.1340000033378601, |
| "tpp_threshold_5_unintended_diff_only": 0.008499979972839355, |
| "tpp_threshold_10_total_metric": 0.19075000286102295, |
| "tpp_threshold_10_intended_diff_only": 0.21299999952316284, |
| "tpp_threshold_10_unintended_diff_only": 0.022249996662139893, |
| "tpp_threshold_20_total_metric": 0.2627500593662262, |
| "tpp_threshold_20_intended_diff_only": 0.28300005197525024, |
| "tpp_threshold_20_unintended_diff_only": 0.020249992609024048, |
| "tpp_threshold_50_total_metric": 0.33625006675720215, |
| "tpp_threshold_50_intended_diff_only": 0.3710000514984131, |
| "tpp_threshold_50_unintended_diff_only": 0.03474998474121094, |
| "tpp_threshold_100_total_metric": 0.3420000672340393, |
| "tpp_threshold_100_intended_diff_only": 0.3800000548362732, |
| "tpp_threshold_100_unintended_diff_only": 0.03799998760223389, |
| "tpp_threshold_500_total_metric": 0.3277500569820404, |
| "tpp_threshold_500_intended_diff_only": 0.3800000548362732, |
| "tpp_threshold_500_unintended_diff_only": 0.05224999785423279 |
| } |
| } |
| } |
| } |